<!DOCTYPE html>
<html lang="zh-CN">

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>arXiv 每日论文精选</title>
    <script src="https://cdn.tailwindcss.com"></script>
    <link href="https://cdn.jsdelivr.net/npm/font-awesome@4.7.0/css/font-awesome.min.css" rel="stylesheet">
    <link rel="stylesheet" href="static/styles.css?v=1761467364">
    <script src="static/tailwind.config.js"></script>

    <style>
        /* 分级折叠功能样式 */
        .collapsed-level-1 .paper-details {
            display: none;
        }
        
        .collapsed-level-2 {
            display: none !important;
        }
        
        /* 展开/折叠图标样式 */
        .expand-icon {
            display: inline-block;
            width: 20px;
            text-align: center;
            margin-right: 5px;
        }
        
        /* 展开/折叠按钮样式 */
        .expand-toggle {
            cursor: pointer;
            padding: 8px 12px;
            background-color: #f3f4f6;
            border: 1px solid #e5e7eb;
            border-radius: 6px;
            margin-bottom: 16px;
            text-align: center;
            font-weight: 500;
            color: #4b5563;
            transition: all 0.2s ease;
        }
        
        .expand-toggle:hover {
            background-color: #e5e7eb;
        }
        
        /* 分割线样式 */
        .papers-divider {
            height: 1px;
            background-color: #e5e7eb;
            margin: 20px 0;
            position: relative;
        }
        
        .papers-divider-label {
            position: absolute;
            left: 50%;
            top: 50%;
            transform: translate(-50%, -50%);
            background-color: white;
            padding: 0 12px;
            color: #9ca3af;
            font-size: 14px;
            cursor: pointer;
        }
        
        .papers-divider-label:hover {
            color: #4b5563;
        }
        
        /* 展开后的样式 */
        .expanded-all .collapsed-level-1 .paper-details,
        .expanded-all .collapsed-level-2 {
            display: block;
        }
        
        .expanded-level-2 .collapsed-level-2 {
            display: block;
        }
    </style>
    </head>

<body class="bg-gray-50 font-sans text-dark">
    <!-- 顶部导航与统计信息合并 -->
    <header class="bg-white shadow-sm sticky top-0 z-10 border-b border-gray-200">
        <div class="container mx-auto px-4 py-4">
            <div class="flex flex-col md:flex-row justify-between items-start md:items-center mb-3">
                <div class="flex items-center">
                    <i class="fa fa-book text-primary text-xl mr-2"></i>
                    <h1 class="text-lg md:text-xl font-bold text-gray-800">arXiv 每日论文精选</h1>
                </div>
                <div class="flex items-center mt-2 md:mt-0">
                    <span id="current-date" class="text-gray-600 text-sm">
                        <i class="fa fa-calendar-o mr-1"></i>2025-10-22
                    </span>
                    <div class="ml-3 relative" id="date-picker-container">
                        <button id="date-picker-toggle" class="bg-light border border-gray-300 text-gray-700 py-1 px-3 pr-6 rounded text-sm leading-tight focus:outline-none focus:bg-white inline-flex items-center">
                            <i class="fa fa-calendar mr-2"></i>
                            <span id="selected-date-text">2025-10-22</span>
                            <i class="fa fa-chevron-down ml-2 text-xs"></i>
                        </button>
                        <div id="date-picker" class="hidden absolute right-0 mt-1 bg-white border border-gray-300 rounded shadow-lg p-2 z-20 w-56">
                            <div class="flex justify-between items-center mb-2">
                                <button id="prev-month" class="text-gray-500 hover:text-gray-700"><i class="fa fa-chevron-left"></i></button>
                                <h4 id="current-month">2025-10-22</h4>
                                <button id="next-month" class="text-gray-500 hover:text-gray-700"><i class="fa fa-chevron-right"></i></button>
                            </div>
                            <div class="grid grid-cols-7 gap-1 text-center text-xs mb-1">
                                <div class="text-gray-500">日</div>
                                <div class="text-gray-500">一</div>
                                <div class="text-gray-500">二</div>
                                <div class="text-gray-500">三</div>
                                <div class="text-gray-500">四</div>
                                <div class="text-gray-500">五</div>
                                <div class="text-gray-500">六</div>
                            </div>
                            <div id="calendar-grid" class="grid grid-cols-7 gap-1 text-center text-sm">
                                <!-- 日历格子将通过JavaScript动态生成 -->
                            </div>
                        </div>
                    </div>
                </div>
            </div>

            <!-- 统计信息 -->
            <div class="flex flex-wrap gap-4 text-sm">
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-file-text-o"></i> 总论文数:</span>
                    <span id="total-papers" class="font-semibold text-primary">133</span>
                </div>
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-star"></i> 精选论文数:</span>
                    <span id="selected-papers" class="font-semibold text-accent">20</span>
                </div>
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-line-chart"></i> 平均评分:</span>
                    <span id="avg-score" class="font-semibold text-secondary">2.6</span>
                </div>
            </div>
        </div>
    </header>

    <!-- 主内容区 -->
    <main class="container mx-auto px-4 py-5">
        <!-- 筛选器 -->
        <div class="mb-4 flex flex-col sm:flex-row justify-between items-start sm:items-center">
            <div class="text-gray-700 text-sm mb-2 sm:mb-0">
                <span id="display-count" class="font-medium">显示 133 篇论文 (共 133 篇)</span>
            </div>
            <div class="flex space-x-2">
                <button id="show-all"
                    class="px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors">
                    全部论文
                </button>
                <button id="show-selected"
                    class="px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors">
                    仅显示精选
                </button>
            </div>
        </div>

        <!-- 论文列表 -->
        <div id="papers-container" class="grid grid-cols-1 gap-4">
            
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18527v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>大语言模型作为稀疏检索器：一种用于第一阶段产品搜索的框架
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LLMs as Sparse Retrievers:A Framework for First-Stage Product Search
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hongru Song, Yu-an Liu, Ruqing Zhang, Jiafeng Guo, Maarten de Rijke, Sen Li, Wen...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究电商产品搜索中稀疏检索的词汇不匹配问题，核心方法是提出PROSPER框架，通过字面残差网络缓解LLM幻觉并强化关键字面词，以及词汇聚焦窗口实现从粗到细的稀疏化训练策略。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对搜索领域的核心问题——词汇不匹配，提出利用LLM改进稀疏检索的创新方法，完全符合核心领域进展和直接LLM应用两个重点方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 11:13:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18527v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18527v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Product search is a crucial component of modern e-commerce platforms, with billions of user queries every day. In product search systems, first-stage retrieval should achieve high recall while ensuring efficient online deployment. Sparse retrieval is particularly attractive in this context due to its interpretability and storage efficiency. However, sparse retrieval methods suffer from severe vocabulary mismatch issues, leading to suboptimal performance in product search scenarios.With their potential for semantic analysis, large language models (LLMs) offer a promising avenue for mitigating vocabulary mismatch issues and thereby improving retrieval quality. Directly applying LLMs to sparse retrieval in product search exposes two key challenges:(1)Queries and product titles are typically short and highly susceptible to LLM-induced hallucinations, such as generating irrelevant expansion terms or underweighting critical literal terms like brand names and model numbers;(2)The large vocabulary space of LLMs leads to difficulty in initializing training effectively, making it challenging to learn meaningful sparse representations in such ultra-high-dimensional spaces.To address these challenges, we propose PROSPER, a framework for PROduct search leveraging LLMs as SParsE Retrievers. PROSPER incorporates: (1)A literal residual network that alleviates hallucination in lexical expansion by reinforcing underweighted literal terms through a residual compensation mechanism; and (2)A lexical focusing window that facilitates effective training initialization via a coarse-to-fine sparsification strategy.Extensive offline and online experiments show that PROSPER significantly outperforms sparse baselines and achieves recall performance comparable to advanced dense retrievers, while also achieving revenue increments online.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18239v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>LIME：基于链接的用户-物品交互建模与解耦异或注意力机制，实现高效测试时扩展
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LIME: Link-based user-item Interaction Modeling with decoupled xor attention for Efficient test time scaling
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yunjiang Jiang, Ayush Agarwal, Yang Liu, Bi Xue
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究推荐系统中Transformer模型在长用户序列和大候选集下的计算效率瓶颈问题。核心方法是采用低秩链接嵌入实现注意力权重的预计算，结合线性注意力机制LIME-XOR，将用户序列的复杂度从二次降为线性，使推理成本几乎独立于候选集大小。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对推荐系统的核心效率瓶颈，通过创新的注意力机制和预计算技术，在保持性能的同时大幅降低推理成本，完全契合效率优化和Transformer架构改进的研究方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 02:53:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18239v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18239v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Scaling large recommendation systems requires advancing three major frontiers: processing longer user histories, expanding candidate sets, and increasing model capacity. While promising, transformers' computational cost scales quadratically with the user sequence length and linearly with the number of candidates. This trade-off makes it prohibitively expensive to expand candidate sets or increase sequence length at inference, despite the significant performance improvements. We introduce \textbf{LIME}, a novel architecture that resolves this trade-off. Through two key innovations, LIME fundamentally reduces computational complexity. First, low-rank ``link embeddings" enable pre-computation of attention weights by decoupling user and candidate interactions, making the inference cost nearly independent of candidate set size. Second, a linear attention mechanism, \textbf{LIME-XOR}, reduces the complexity with respect to user sequence length from quadratic ($O(N^2)$) to linear ($O(N)$). Experiments on public and industrial datasets show LIME achieves near-parity with state-of-the-art transformers but with a 10$\times$ inference speedup on large candidate sets or long sequence lengths. When tested on a major recommendation platform, LIME improved user engagement while maintaining minimal inference costs with respect to candidate set size and user history length, establishing a new paradigm for efficient and expressive recommendation systems.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18866v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>LightMem：轻量级高效记忆增强生成
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LightMem: Lightweight and Efficient Memory-Augmented Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jizhan Fang, Xinle Deng, Haoming Xu, Ziyan Jiang, Yuqi Tang, Ziwen Xu, Shumin De...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究LLM在动态复杂环境中有效利用历史交互信息的问题，核心思想是借鉴人类记忆的Atkinson-Shiffrin模型，构建包含感官记忆、主题感知短期记忆和睡眠时间更新的长期记忆的三阶段轻量化内存系统。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接解决LLM在动态环境中利用历史交互信息的关键问题，其轻量化内存架构和效率优化方法对推荐系统和搜索应用具有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:58:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18866v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18866v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span><span class="category-tag">cs.MA</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Despite their remarkable capabilities, Large Language Models (LLMs) struggle to effectively leverage historical interaction information in dynamic and complex environments. Memory systems enable LLMs to move beyond stateless interactions by introducing persistent information storage, retrieval, and utilization mechanisms. However, existing memory systems often introduce substantial time and computational overhead. To this end, we introduce a new memory system called LightMem, which strikes a balance between the performance and efficiency of memory systems. Inspired by the Atkinson-Shiffrin model of human memory, LightMem organizes memory into three complementary stages. First, cognition-inspired sensory memory rapidly filters irrelevant information through lightweight compression and groups information according to their topics. Next, topic-aware short-term memory consolidates these topic-based groups, organizing and summarizing content for more structured access. Finally, long-term memory with sleep-time update employs an offline procedure that decouples consolidation from online inference. Experiments on LongMemEval with GPT and Qwen backbones show that LightMem outperforms strong baselines in accuracy (up to 10.9% gains) while reducing token usage by up to 117x, API calls by up to 159x, and runtime by over 12x. The code is available at https://github.com/zjunlp/LightMem.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18830v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>MTraining：用于高效超长上下文训练的分布式动态稀疏注意力机制
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MTraining: Distributed Dynamic Sparse Attention for Efficient Ultra-Long Context Training
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wenxuan Li, Chengruidong Zhang, Huiqiang Jiang, Yucheng Li, Yuqing Yang, Lili Qi...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究超长上下文LLM训练的计算效率问题，核心方法是提出MTraining分布式框架，通过动态稀疏训练模式、平衡稀疏环注意力和层次稀疏环注意力协同解决计算不平衡和通信开销。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对Transformer架构效率提升，提出分布式动态稀疏注意力方法，对超长上下文训练具有重要价值，与LLM效率技术高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:25:32
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18830v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18830v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.DC</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The adoption of long context windows has become a standard feature in Large Language Models (LLMs), as extended contexts significantly enhance their capacity for complex reasoning and broaden their applicability across diverse scenarios. Dynamic sparse attention is a promising approach for reducing the computational cost of long-context. However, efficiently training LLMs with dynamic sparse attention on ultra-long contexts-especially in distributed settings-remains a significant challenge, due in large part to worker- and step-level imbalance. This paper introduces MTraining, a novel distributed methodology leveraging dynamic sparse attention to enable efficient training for LLMs with ultra-long contexts. Specifically, MTraining integrates three key components: a dynamic sparse training pattern, balanced sparse ring attention, and hierarchical sparse ring attention. These components are designed to synergistically address the computational imbalance and communication overheads inherent in dynamic sparse attention mechanisms during the training of models with extensive context lengths. We demonstrate the efficacy of MTraining by training Qwen2.5-3B, successfully expanding its context window from 32K to 512K tokens on a cluster of 32 A100 GPUs. Our evaluations on a comprehensive suite of downstream tasks, including RULER, PG-19, InfiniteBench, and Needle In A Haystack, reveal that MTraining achieves up to a 6x higher training throughput while preserving model accuracy. Our code is available at https://github.com/microsoft/MInference/tree/main/MTraining.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18413v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>Adamas：用于高效长上下文推理的哈达玛稀疏注意力
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Adamas: Hadamard Sparse Attention for Efficient Long-Context Inference
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Siyuan Yan, Guo-Qing Jiang, Yuchen Zhang, Xiaoxing Ma, Ran Zhu, Chun Cao, Jingwe...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究长上下文Transformer推理中的二次方注意力计算成本问题，核心思想是使用Hadamard变换、分桶压缩和曼哈顿距离估计来实现高效的top-k稀疏注意力选择。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了一种高效的稀疏注意力机制，直接解决了Transformer架构在长上下文推理中的计算瓶颈，对推荐系统和搜索中的长序列建模具有重要应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 08:44:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18413v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18413v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) now support context windows of hundreds of thousands to millions of tokens, enabling applications such as long-document summarization, large-scale code synthesis, multi-document question answering and persistent multi-turn dialogue. However, such extended contexts exacerbate the quadratic cost of self-attention, leading to severe latency in autoregressive decoding. Existing sparse attention methods alleviate these costs but rely on heuristic patterns that struggle to recall critical key-value (KV) pairs for each query, resulting in accuracy degradation. We introduce Adamas, a lightweight yet highly accurate sparse attention mechanism designed for long-context inference. Adamas applies the Hadamard transform, bucketization and 2-bit compression to produce compact representations, and leverages Manhattan-distance estimation for efficient top-k selections. Experiments show that Adamas matches the accuracy of full attention with only a 64-token budget, achieves near-lossless performance at 128, and supports up to 8x higher sparsity than prior state-of-the-art (SOTA) methods while delivering up to 4.4x self-attention and 1.5x end-to-end speedups on 32K-length sequences. Remarkably, Adamas attains comparable or even lower perplexity than full attention, underscoring its effectiveness in maintaining accuracy under aggressive sparsity.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18364v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>评估基于大语言模型的移动应用推荐：一项实证研究
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Evaluating LLM-Based Mobile App Recommendations: An Empirical Study
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Quim Motger, Xavier Franch, Vincenzo Gervasi, Jordi Marco
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究LLM如何生成和排序移动应用推荐的核心问题，核心方法是构建排名标准分类法和评估框架来分析LLM推荐的一致性和对指令的响应性。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接评估LLM在移动应用推荐中的实际应用，属于LLM在推荐系统的直接应用领域，并提供了系统性的评估框架和排名标准分类。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 07:35:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18364v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18364v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.SE</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) are increasingly used to recommend mobile applications through natural language prompts, offering a flexible alternative to keyword-based app store search. Yet, the reasoning behind these recommendations remains opaque, raising questions about their consistency, explainability, and alignment with traditional App Store Optimization (ASO) metrics. In this paper, we present an empirical analysis of how widely-used general purpose LLMs generate, justify, and rank mobile app recommendations. Our contributions are: (i) a taxonomy of 16 generalizable ranking criteria elicited from LLM outputs; (ii) a systematic evaluation framework to analyse recommendation consistency and responsiveness to explicit ranking instructions; and (iii) a replication package to support reproducibility and future research on AI-based recommendation systems. Our findings reveal that LLMs rely on a broad yet fragmented set of ranking criteria, only partially aligned with standard ASO metrics. While top-ranked apps tend to be consistent across runs, variability increases with ranking depth and search specificity. LLMs exhibit varying sensitivity to explicit ranking instructions - ranging from substantial adaptations to near-identical outputs - highlighting their complex reasoning dynamics in conversational app discovery. Our results aim to support end-users, app developers, and recommender-systems researchers in navigating the emerging landscape of conversational app discovery.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18876v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>抓取任意区域：面向多模态大语言模型的精确、上下文像素理解
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Grasp Any Region: Towards Precise, Contextual Pixel Understanding for Multimodal LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haochen Wang, Yuhao Wang, Tao Zhang, Yikang Zhou, Yanwei Li, Jiacong Wang, Ye Ti...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究多模态大语言模型在复杂场景中难以进行细粒度区域理解的问题，核心思想是通过RoI对齐特征重放技术，在利用全局上下文的同时支持多提示交互建模，实现从被动描述到主动对话的区域级视觉理解。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的区域感知多模态理解和上下文建模方法直接适用于推荐系统中处理复杂用户行为序列和异构特征，其多提示交互建模可类比推荐中的多目标优化。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:59:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18876v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18876v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    While Multimodal Large Language Models (MLLMs) excel at holistic understanding, they struggle in capturing the dense world with complex scenes, requiring fine-grained analysis of intricate details and object inter-relationships. Region-level MLLMs have been a promising step. However, previous attempts are generally optimized to understand given regions in isolation, neglecting crucial global contexts. To address this, we introduce Grasp Any Region (GAR) for comprehen- sive region-level visual understanding. Empowered by an effective RoI-aligned feature replay technique, GAR supports (1) precise perception by leveraging necessary global contexts, and (2) modeling interactions between multiple prompts. Together, it then naturally achieves (3) advanced compositional reasoning to answer specific free-form questions about any region, shifting the paradigm from passive description to active dialogue. Moreover, we construct GAR-Bench, which not only provides a more accurate evaluation of single-region comprehension, but also, more importantly, measures interactions and complex reasoning across multiple regions. Extensive experiments have demonstrated that GAR-1B not only maintains the state-of-the-art captioning capabilities, e.g., outperforming DAM-3B +4.5 on DLC-Bench, but also excels at modeling relationships between multiple prompts with advanced comprehension capabilities, even surpassing InternVL3-78B on GAR-Bench-VQA. More importantly, our zero-shot GAR-8B even outperforms in-domain VideoRefer-7B on VideoRefer-BenchQ, indicating its strong capabilities can be easily transferred to videos.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18871v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>大型语言模型如何利用其深度？
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            How Do LLMs Use Their Depth?
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Akshat Gupta, Jay Yeung, Gopala Anumanchipalli, Anna Ivanova
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究LLM如何在其深度层次中结构化计算以进行预测，核心提出'猜测-精炼'框架：早期层基于高频词汇进行统计猜测，深层利用上下文信息逐步精炼预测，揭示了Transformer模型分层计算的内在机制。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文揭示了LLM内部计算结构的分层动态机制，直接关联Transformer架构效率优化和LLM核心工作机制理解，对改进推荐和搜索系统的计算效率具有重要启发价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:59:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18871v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18871v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Growing evidence suggests that large language models do not use their depth uniformly, yet we still lack a fine-grained understanding of their layer-wise prediction dynamics. In this paper, we trace the intermediate representations of several open-weight models during inference and reveal a structured and nuanced use of depth. Specifically, we propose a "Guess-then-Refine" framework that explains how LLMs internally structure their computations to make predictions. We first show that the top-ranked predictions in early LLM layers are composed primarily of high-frequency tokens, which act as statistical guesses proposed by the model early on due to the lack of appropriate contextual information. As contextual information develops deeper into the model, these initial guesses get refined into contextually appropriate tokens. Even high-frequency token predictions from early layers get refined >70% of the time, indicating that correct token prediction is not "one-and-done". We then go beyond frequency-based prediction to examine the dynamic usage of layer depth across three case studies. (i) Part-of-speech analysis shows that function words are, on average, the earliest to be predicted correctly. (ii) Fact recall task analysis shows that, in a multi-token answer, the first token requires more computational depth than the rest. (iii) Multiple-choice task analysis shows that the model identifies the format of the response within the first half of the layers, but finalizes its response only toward the end. Together, our results provide a detailed view of depth usage in LLMs, shedding light on the layer-by-layer computations that underlie successful predictions and providing insights for future works to improve computational efficiency in transformer-based models.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18849v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>基于评论-后编辑强化学习的忠实可控个性化方法研究
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Towards Faithful and Controllable Personalization via Critique-Post-Edit Reinforcement Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chenghao Zhu, Meiling Tao, Tiannan Wang, Dongyi Ding, Yuchen Eleanor Jiang, Wang...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究LLM忠实个性化对齐用户偏好的核心问题，核心方法是提出Critique-Post-Edit强化学习框架，通过个性化生成奖励模型提供多维评分和文本批评，结合策略模型基于批评自我修订输出来实现更精准的学习。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对LLM个性化应用的核心挑战，提出新的强化学习框架解决忠实性和可控性问题，与直接LLM应用领域高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:40:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18849v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18849v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Faithfully personalizing large language models (LLMs) to align with individual user preferences is a critical but challenging task. While supervised fine-tuning (SFT) quickly reaches a performance plateau, standard reinforcement learning from human feedback (RLHF) also struggles with the nuances of personalization. Scalar-based reward models are prone to reward hacking which leads to verbose and superficially personalized responses. To address these limitations, we propose Critique-Post-Edit, a robust reinforcement learning framework that enables more faithful and controllable personalization. Our framework integrates two key components: (1) a Personalized Generative Reward Model (GRM) that provides multi-dimensional scores and textual critiques to resist reward hacking, and (2) a Critique-Post-Edit mechanism where the policy model revises its own outputs based on these critiques for more targeted and efficient learning. Under a rigorous length-controlled evaluation, our method substantially outperforms standard PPO on personalization benchmarks. Personalized Qwen2.5-7B achieves an average 11\% win-rate improvement, and personalized Qwen2.5-14B model surpasses the performance of GPT-4.1. These results demonstrate a practical path to faithful, efficient, and controllable personalization.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18798v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>WebSeer：通过自反思强化学习训练更深层次的搜索智能体
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            WebSeer: Training Deeper Search Agents through Reinforcement Learning with Self-Reflection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Guanzhong He, Zhen Yang, Jinxin Liu, Bin Xu, Lei Hou, Juanzi Li
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究搜索代理在交互环境中工具使用深度不足和错误累积的问题，核心方法是通过自反思机制增强强化学习训练，使模型能够生成更长且更具反思性的工具使用轨迹。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文通过强化学习与自反思机制训练搜索代理，直接针对搜索领域的核心问题，其深度工具使用和错误累积解决方案对智能检索系统具有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 16:52:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18798v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18798v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Search agents have achieved significant advancements in enabling intelligent information retrieval and decision-making within interactive environments. Although reinforcement learning has been employed to train agentic models capable of more dynamic interactive retrieval, existing methods are limited by shallow tool-use depth and the accumulation of errors over multiple iterative interactions. In this paper, we present WebSeer, a more intelligent search agent trained via reinforcement learning enhanced with a self-reflection mechanism. Specifically, we construct a large dataset annotated with reflection patterns and design a two-stage training framework that unifies cold start and reinforcement learning within the self-reflection paradigm for real-world web-based environments, which enables the model to generate longer and more reflective tool-use trajectories. Our approach substantially extends tool-use chains and improves answer accuracy. Using a single 14B model, we achieve state-of-the-art results on HotpotQA and SimpleQA, with accuracies of 72.3% and 90.0%, respectively, and demonstrate strong generalization to out-of-distribution datasets. The code is available at https://github.com/99hgz/WebSeer
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18257v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>DelvePO：面向灵活提示优化的方向引导自进化框架
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DelvePO: Direction-Guided Self-Evolving Framework for Flexible Prompt Optimization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tao Tao, Guanghui Zhu, Lang Guo, Hongyi Chen, Chunfeng Yuan, Yihua Huang
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究提示优化容易陷入局部最优和泛化性差的问题，核心思想是将提示解耦为不同组件，通过工作记忆机制引导LLM自进化生成新提示，实现任务无关的灵活优化。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出方向引导的自进化提示优化框架，直接属于LLM应用技术，对搜索和推荐系统中的提示工程有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 03:28:53
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18257v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18257v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Prompt Optimization has emerged as a crucial approach due to its capabilities in steering Large Language Models to solve various tasks. However, current works mainly rely on the random rewriting ability of LLMs, and the optimization process generally focus on specific influencing factors, which makes it easy to fall into local optimum. Besides, the performance of the optimized prompt is often unstable, which limits its transferability in different tasks. To address the above challenges, we propose $\textbf{DelvePO}$ ($\textbf{D}$irection-Guid$\textbf{e}$d Se$\textbf{l}$f-E$\textbf{v}$olving Framework for Fl$\textbf{e}$xible $\textbf{P}$rompt $\textbf{O}$ptimization), a task-agnostic framework to optimize prompts in self-evolve manner. In our framework, we decouple prompts into different components that can be used to explore the impact that different factors may have on various tasks. On this basis, we introduce working memory, through which LLMs can alleviate the deficiencies caused by their own uncertainties and further obtain key insights to guide the generation of new prompts. Extensive experiments conducted on different tasks covering various domains for both open- and closed-source LLMs, including DeepSeek-R1-Distill-Llama-8B, Qwen2.5-7B-Instruct and GPT-4o-mini. Experimental results show that DelvePO consistently outperforms previous SOTA methods under identical experimental settings, demonstrating its effectiveness and transferability across different tasks.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18825v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>通过分层掩码框架统一和增强图Transformer
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Unifying and Enhancing Graph Transformers via a Hierarchical Mask Framework
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yujie Xing, Xiao Wang, Bin Wu, Hai Huang, Chuan Shi
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究图Transformer架构的统一化问题，核心思想是通过层次化注意力掩码框架揭示模型架构与注意力掩码的等价性，并提出基于专家混合的双重注意力计算机制来自适应整合多层次交互信息。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的统一层次掩码框架和MoE架构直接属于Transformer架构效率优化领域，其自适应注意力机制对推荐系统中的图数据处理具有重要参考价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:22:32
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18825v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18825v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Graph Transformers (GTs) have emerged as a powerful paradigm for graph representation learning due to their ability to model diverse node interactions. However, existing GTs often rely on intricate architectural designs tailored to specific interactions, limiting their flexibility. To address this, we propose a unified hierarchical mask framework that reveals an underlying equivalence between model architecture and attention mask construction. This framework enables a consistent modeling paradigm by capturing diverse interactions through carefully designed attention masks. Theoretical analysis under this framework demonstrates that the probability of correct classification positively correlates with the receptive field size and label consistency, leading to a fundamental design principle: an effective attention mask should ensure both a sufficiently large receptive field and a high level of label consistency. While no single existing mask satisfies this principle across all scenarios, our analysis reveals that hierarchical masks offer complementary strengths, motivating their effective integration. Then, we introduce M3Dphormer, a Mixture-of-Experts-based Graph Transformer with Multi-Level Masking and Dual Attention Computation. M3Dphormer incorporates three theoretically grounded hierarchical masks and employs a bi-level expert routing mechanism to adaptively integrate multi-level interaction information. To ensure scalability, we further introduce a dual attention computation scheme that dynamically switches between dense and sparse modes based on local mask sparsity. Extensive experiments across multiple benchmarks demonstrate that M3Dphormer achieves state-of-the-art performance, validating the effectiveness of our unified framework and model design.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18358v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>集成剪枝注意力头以实现不确定性感知的高效Transformer
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Ensembling Pruned Attention Heads For Uncertainty-Aware Efficient Transformers
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Firas Gabetni, Giuseppe Curci, Andrea Pilzer, Subhankar Roy, Elisa Ricci, Gianni...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何在保持不确定性量化性能的同时提高Transformer模型的推理效率。核心方法是剪枝注意力头创建多样化集成成员，并通过分组全连接层合并，构建紧凑高效的集成模型。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出通过剪枝注意力头构建高效Transformer集成的方法，直接改进Transformer架构效率，对推荐和搜索系统的高效部署具有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 07:26:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18358v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18358v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Uncertainty quantification (UQ) is essential for deploying deep neural networks in safety-critical settings. Although methods like Deep Ensembles achieve strong UQ performance, their high computational and memory costs hinder scalability to large models. We introduce Hydra Ensembles, an efficient transformer-based ensemble that prunes attention heads to create diverse members and merges them via a new multi-head attention with grouped fully-connected layers. This yields a compact model with inference speed close to a single network, matching or surpassing Deep Ensembles in UQ performance without retraining from scratch. We also provide an in-depth analysis of pruning, showing that naive approaches can harm calibration, whereas Hydra Ensembles preserves robust uncertainty. Experiments on image and text classification tasks, with various architectures, show consistent gains over Deep Ensembles. Remarkably, in zero-shot classification on ImageNet-1k, our approach surpasses state of the art methods, even without requiring additional training.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18277v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>基于AI增强酒店推荐：LLM驱动的评论摘要与查询导向洞察
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Enhancing Hotel Recommendations with AI: LLM-Based Review Summarization and Query-Driven Insights
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nikolaos Belibasakis, Anastasios Giannaros, Ioanna Giannoukou, Spyros Sioutas
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何解决用户在海量酒店评论中高效获取信息的难题，核心方法是利用LLM自动总结评论并支持用户查询特定属性方面，提供个性化洞察。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接应用LLM技术于推荐系统领域，通过文本摘要和查询驱动洞察增强酒店推荐，属于直接LLM应用范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 04:02:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18277v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18277v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The increasing number of data a booking platform such as Booking.com and AirBnB offers make it challenging for interested parties to browse through the available accommodations and analyze reviews in an efficient way. Efforts have been made from the booking platform providers to utilize recommender systems in an effort to enable the user to filter the results by factors such as stars, amenities, cost but most valuable insights can be provided by the unstructured text-based reviews. Going through these reviews one-by-one requires a substantial amount of time to be devoted while a respectable percentage of the reviews won't provide to the user what they are actually looking for. This research publication explores how Large Language Models (LLMs) can enhance short rental apartments recommendations by summarizing and mining key insights from user reviews. The web application presented in this paper, named "instaGuide", automates the procedure of isolating the text-based user reviews from a property on the Booking.com platform, synthesizing the summary of the reviews, and enabling the user to query specific aspects of the property in an effort to gain feedback on their personal questions/criteria. During the development of the instaGuide tool, numerous LLM models were evaluated based on accuracy, cost, and response quality. The results suggest that the LLM-powered summarization reduces significantly the amount of time the users need to devote on their search for the right short rental apartment, improving the overall decision-making procedure.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18745v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>Topoformer：通过空间查询与重加权在Transformer语言模型中实现类脑地形组织
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Topoformer: brain-like topographic organization in Transformer language models through spatial querying and reweighting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Taha Binhuraib, Greta Tuckute, Nicholas Blauch
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究Transformer模型缺乏空间偏置的组织问题；核心方法是通过空间查询（二维网格排列键值对）和空间重加权（局部连接层替代全连接层），在Transformer中实现类似大脑的地形组织。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的空间查询和重加权机制属于Transformer架构创新，直接提升模型可解释性，对搜索推荐系统的透明化有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:54:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18745v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18745v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Spatial functional organization is a hallmark of biological brains: neurons are arranged topographically according to their response properties, at multiple scales. In contrast, representations within most machine learning models lack spatial biases, instead manifesting as disorganized vector spaces that are difficult to visualize and interpret. Here, we propose a novel form of self-attention that turns Transformers into "Topoformers" with topographic organization. We introduce spatial querying - where keys and queries are arranged on 2D grids, and local pools of queries are associated with a given key - and spatial reweighting, where we convert the standard fully connected layer of self-attention into a locally connected layer. We first demonstrate the feasibility of our approach by training a 1-layer Topoformer on a sentiment classification task. Training with spatial querying encourages topographic organization in the queries and keys, and spatial reweighting separately encourages topographic organization in the values and self-attention outputs. We then apply the Topoformer motifs at scale, training a BERT architecture with a masked language modeling objective. We find that the topographic variant performs on par with a non-topographic control model on NLP benchmarks, yet produces interpretable topographic organization as evaluated via eight linguistic test suites. Finally, analyzing an fMRI dataset of human brain responses to a large set of naturalistic sentences, we demonstrate alignment between low-dimensional topographic variability in the Topoformer model and human brain language network. Scaling up Topoformers further holds promise for greater interpretability in NLP research, and for more accurate models of the organization of linguistic information in the human brain.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18684v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>MLMA：基于Mamba架构的多语言模型探索
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MLMA: Towards Multilingual with Mamba Based Architectures
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mohamed Nabih Ali, Daniele Falavigna, Alessio Brutti
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究多语言语音识别中平衡高低资源语言性能的问题；核心方法是采用Mamba状态空间模型架构替代Transformer，通过语言感知条件和共享表示实现高效长序列处理。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文探索Mamba架构作为Transformer替代方案，在序列建模效率方面具有直接相关性，但应用领域是语音识别而非核心推荐搜索场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 14:44:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18684v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18684v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.SD</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multilingual automatic speech recognition (ASR) remains a challenging task, especially when balancing performance across high- and low-resource languages. Recent advances in sequence modeling suggest that architectures beyond Transformers may offer better scalability and efficiency. In this work, we introduce MLMA (Multilingual Language Modeling with Mamba for ASR), a new approach that leverages the Mamba architecture--an efficient state-space model optimized for long-context sequence processing--for multilingual ASR. Using Mamba, MLMA implicitly incorporates language-aware conditioning and shared representations to support robust recognition across diverse languages. Experiments on standard multilingual benchmarks show that MLMA achieves competitive performance compared to Transformer-based architectures. These results highlight Mamba's potential as a strong backbone for scalable, efficient, and accurate multilingual speech recognition.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18462v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>DePass：通过简单分解前向传递的统一特征归因方法
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DePass: Unified Feature Attributing by Simple Decomposed Forward Pass
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiangyu Hong, Che Jiang, Kai Tian, Biqing Qi, Youbang Sun, Ning Ding, Bowen Zhou
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究Transformer模型内部计算行为的特征归因问题；核心方法是通过单次分解前向传播，将隐藏状态分解为自定义加性分量并固定注意力分数和MLP激活进行传播，实现无需辅助训练的细粒度归因。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的Transformer内部特征归因方法DePass，对于理解推荐/搜索系统中模型决策机制具有直接应用价值，属于模型可解释性核心技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:36:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18462v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18462v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Attributing the behavior of Transformer models to internal computations is a central challenge in mechanistic interpretability. We introduce DePass, a unified framework for feature attribution based on a single decomposed forward pass. DePass decomposes hidden states into customized additive components, then propagates them with attention scores and MLP's activations fixed. It achieves faithful, fine-grained attribution without requiring auxiliary training. We validate DePass across token-level, model component-level, and subspace-level attribution tasks, demonstrating its effectiveness and fidelity. Our experiments highlight its potential to attribute information flow between arbitrary components of a Transformer model. We hope DePass serves as a foundational tool for broader applications in interpretability.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18279v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>文本还是像素？各需一半：论多模态大语言模型中视觉文本输入的标记效率
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Text or Pixels? It Takes Half: On the Token Efficiency of Visual Text Inputs in Multimodal LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yanhong Li, Zixuan Lan, Jiawei Zhou
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究如何通过将长文本渲染为单张图像来压缩多模态LLM的输入；核心方法是利用视觉文本表示作为输入压缩手段，显著减少解码器token数量。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文探索视觉文本输入作为输入压缩方法，直接涉及LLM效率提升技术，对搜索和推荐系统中的长文本处理具有应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 04:07:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18279v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18279v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) and their multimodal variants can now process visual inputs, including images of text. This raises an intriguing question: can we compress textual inputs by feeding them as images to reduce token usage while preserving performance? In this paper, we show that visual text representations are a practical and surprisingly effective form of input compression for decoder LLMs. We exploit the idea of rendering long text inputs as a single image and provide it directly to the model. This leads to dramatically reduced number of decoder tokens required, offering a new form of input compression. Through experiments on two distinct benchmarks RULER (long-context retrieval) and CNN/DailyMail (document summarization) we demonstrate that this text-as-image method yields substantial token savings (often nearly half) without degrading task performance.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18431v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>ScaleNet：通过增量参数扩展预训练神经网络
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ScaleNet: Scaling up Pretrained Neural Networks with Incremental Parameters
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhiwei Hao, Jianyuan Guo, Li Shen, Kai Han, Yehui Tang, Han Hu, Yunhe Wang
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究如何高效扩展预训练视觉Transformer模型规模的问题，核心方法是通过在预训练模型中插入共享权重的新层并添加少量调整参数来实现参数高效的模型扩展。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的增量参数扩展方法直接适用于Transformer架构的效率优化，对推荐系统的大规模模型部署具有重要参考价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:07:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18431v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18431v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advancements in vision transformers (ViTs) have demonstrated that larger models often achieve superior performance. However, training these models remains computationally intensive and costly. To address this challenge, we introduce ScaleNet, an efficient approach for scaling ViT models. Unlike conventional training from scratch, ScaleNet facilitates rapid model expansion with negligible increases in parameters, building on existing pretrained models. This offers a cost-effective solution for scaling up ViTs. Specifically, ScaleNet achieves model expansion by inserting additional layers into pretrained ViTs, utilizing layer-wise weight sharing to maintain parameters efficiency. Each added layer shares its parameter tensor with a corresponding layer from the pretrained model. To mitigate potential performance degradation due to shared weights, ScaleNet introduces a small set of adjustment parameters for each layer. These adjustment parameters are implemented through parallel adapter modules, ensuring that each instance of the shared parameter tensor remains distinct and optimized for its specific function. Experiments on the ImageNet-1K dataset demonstrate that ScaleNet enables efficient expansion of ViT models. With a 2$\times$ depth-scaled DeiT-Base model, ScaleNet achieves a 7.42% accuracy improvement over training from scratch while requiring only one-third of the training epochs, highlighting its efficiency in scaling ViTs. Beyond image classification, our method shows significant potential for application in downstream vision areas, as evidenced by the validation in object detection task.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18723v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>贝叶斯低秩分解用于鲁棒模型自适应
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Bayesian Low-Rank Factorization for Robust Model Adaptation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Enes Yavuz Ugan, Ngoc-Quan Pham, Alexander Waibel
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究语音基础模型在特定领域（如语码转换）微调时的过拟合和灾难性遗忘问题，核心方法是引入贝叶斯先验的低秩适配器，通过稀疏化适配矩阵来平衡新领域适应与基础模型能力保留。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的贝叶斯低秩适配方法虽然针对语音模型，但其核心思想——通过稀疏化适配矩阵来平衡领域适应与基础能力保留——可直接迁移到推荐系统中的模型微调场景，避免灾难性遗忘。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:23:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18723v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18723v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span><span class="category-tag">cs.SD</span><span class="category-tag">eess.AS</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large speech foundation models achieve strong performance across many domains, but they often require adaptation to handle local needs such as code-switching, where speakers mix languages within the same utterance. Direct fine-tuning of these models risks overfitting to the target domain and overwriting the broad capabilities of the base model. To address this challenge, we explore Bayesian factorized adapters for speech foundation models, which place priors near zero to achieve sparser adaptation matrices and thereby retain general performance while adapting to specific domains. We apply our approach to the Whisper model and evaluate on different multilingual code-switching scenarios. Our results show only minimal adaptation loss while significantly reducing catastrophic forgetting of the base model. Compared to LoRA, our method achieves a backward gain of 54% with only a 4% drop on the new domain. These findings highlight the effectiveness of Bayesian adaptation for fine-tuning speech foundation models without sacrificing generalization.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18795v1" target="_blank" rel="noopener noreferrer">
                ProCLIP：基于大语言模型嵌入器的渐进式视觉-语言对齐
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ProCLIP: Progressive Vision-Language Alignment via LLM-based Embedder
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiaoxing Hu, Kaicheng Yang, Ziyong Feng, Qi Ming, Zonghao Guo, Xiang An, Ziyong ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了一种渐进式视觉-语言对齐方法，属于VLM（视觉语言模型）技术范畴。虽然VLM技术本身具有启发意义，可以类比处理推荐系统中的异构数据模态，但该论文主要聚焦于视觉-语言对齐这一特定任务，与推荐/搜索/广告系统的直接关联性较弱，潜在应用场景不够明确。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 16:48:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18795v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18795v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The original CLIP text encoder is limited by a maximum input length of 77 tokens, which hampers its ability to effectively process long texts and perform fine-grained semantic understanding. In addition, the CLIP text encoder lacks support for multilingual inputs. All these limitations significantly restrict its applicability across a broader range of tasks. Recent studies have attempted to replace the CLIP text encoder with an LLM-based embedder to enhance its ability in processing long texts, multilingual understanding, and fine-grained semantic comprehension. However, because the representation spaces of LLMs and the vision-language space of CLIP are pretrained independently without alignment priors, direct alignment using contrastive learning can disrupt the intrinsic vision-language alignment in the CLIP image encoder, leading to an underutilization of the knowledge acquired during pre-training. To address this challenge, we propose ProCLIP, a curriculum learning-based progressive vision-language alignment framework to effectively align the CLIP image encoder with an LLM-based embedder. Specifically, ProCLIP first distills knowledge from CLIP's text encoder into the LLM-based embedder to leverage CLIP's rich pretrained knowledge while establishing initial alignment between the LLM embedder and CLIP image encoder. Subsequently, ProCLIP further aligns the CLIP image encoder with the LLM-based embedder through image-text contrastive tuning, employing self-distillation regularization to avoid overfitting. To achieve a more effective alignment, instance semantic alignment loss and embedding structure alignment loss are employed during representation inheritance and contrastive tuning. The Code is available at https://github.com/VisionXLab/ProCLIP
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18725v1" target="_blank" rel="noopener noreferrer">
                SemiAdapt与SemiLoRA：基于Transformer的低资源语言翻译高效领域自适应——以爱尔兰语为例研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SemiAdapt and SemiLoRA: Efficient Domain Adaptation for Transformer-based Low-Resource Language Translation with a Case Study on Irish
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Josh McGiff, Nikola S. Nikolov
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注低资源语言翻译的领域自适应技术，虽然涉及Transformer架构的效率优化，但其核心应用场景是机器翻译而非推荐系统、搜索或广告领域。SemiLoRA等技术可能对Transformer效率有改进，但在RecSys/Search/Ads中的潜在应用不够明确和直接。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:24:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18725v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18725v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Fine-tuning is widely used to tailor large language models for specific tasks such as neural machine translation (NMT). However, leveraging transfer learning is computationally expensive when fine-tuning large multilingual models with billions of parameters, thus creating a barrier to entry for researchers working on low-resource domains such as Irish translation. Parameter-efficient fine-tuning (PEFT) bridges this gap by training on a fraction of the original model parameters, with the Low-Rank Adaptation (LoRA) approach introducing small, trainable adapter layers. We introduce SemiAdapt and SemiLoRA as semi-supervised inference-efficient approaches that strengthen domain adaptation and lead to improved overall performance in NMT. We demonstrate that SemiAdapt can outperform full-domain fine-tuning, while most notably, SemiLoRA can propel PEFT methods to match or even outperform full-model fine-tuning. We further evaluate domain-by-dataset fine-tuning and demonstrate that our embedding-based inference methods perform especially well on larger and noisier corpora. All Irish translation models developed in this work are released as open resources. These methods aim to make high-quality domain adaptation and fine-tuning more accessible to researchers working with low-resource languages.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18480v1" target="_blank" rel="noopener noreferrer">
                扩散语言模型的效率如何？对效率评估实践的批判性审视
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            How Efficient Are Diffusion Language Models? A Critical Examination of Efficiency Evaluation Practices
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Han Peng, Peiyu Liu, Zican Dong, Daixuan Cheng, Junyi Li, Yiru Tang, Shuo Wang, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注扩散语言模型的效率评估实践，属于LLM效率分析范畴。虽然效率优化对推荐/搜索系统有潜在价值，但论文焦点是评估方法论而非具体的效率提升技术，且扩散模型在RecSys/Search/Ads中的直接应用尚不明确，因此相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 10:00:32
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18480v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18480v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Diffusion language models (DLMs) have emerged as a promising alternative to the long-dominant autoregressive (AR) paradigm, offering a parallelable decoding process that could yield greater efficiency. Yet, in practice, current open-source DLMs often underperform their AR counterparts in speed, limiting their real-world utility. This work presents a systematic study of DLM efficiency, identifying key issues in prior evaluation methods. Through empirical benchmarking and a roofline-based theoretical analysis, we demonstrate that AR models generally achieve higher throughput, while DLMs consistently lag. We also investigate acceleration strategies, finding that techniques like dual cache and parallel decoding mainly offer gains at small batch sizes, with their benefits diminishing upon scaling. Our findings underscore the necessity of robust evaluation methods and improved acceleration strategies to advance research on DLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18476v1" target="_blank" rel="noopener noreferrer">
                社交智能大语言模型代理中意图的概率建模
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Probabilistic Modeling of Intentions in Socially Intelligent LLM Agents
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Feifan Xia, Yuyang Fang, Defang Li, Yantong Xie, Weikang Li, Yang Li, Deguo Xia,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM代理的社交智能和意图建模，属于纯粹的LLM应用研究。虽然意图理解在推荐和搜索中有潜在价值，但论文聚焦于社交交互场景而非具体的推荐/搜索/广告应用，与当前关注的核心领域进展和直接应用关联度较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:54:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18476v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18476v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present a probabilistic intent modeling framework for large language model (LLM) agents in multi-turn social dialogue. The framework maintains a belief distribution over a partner's latent intentions, initialized from contextual priors and dynamically updated through likelihood estimation after each utterance. The evolving distribution provides additional contextual grounding for the policy, enabling adaptive dialogue strategies under uncertainty. Preliminary experiments in the SOTOPIA environment show consistent improvements: the proposed framework increases the Overall score by 9.0% on SOTOPIA-All and 4.1% on SOTOPIA-Hard compared with the Qwen2.5-7B baseline, and slightly surpasses an oracle agent that directly observes partner intentions. These early results suggest that probabilistic intent modeling can contribute to the development of socially intelligent LLM agents.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18434v1" target="_blank" rel="noopener noreferrer">
                概念思维链：在响应中引导智能体进行深度思考
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Chain-of-Conceptual-Thought: Eliciting the Agent to Deeply Think within the Response
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qingqing Gu, Dan Wang, Yue Zhao, Xiaoyu Wang, Zhonglin Jiang, Yong Chen, Hongyan...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM推理能力的提升，属于'Enabling LLM Tech'范畴。通过改进思维链方法增强模型深度思考能力，在搜索和推荐系统中可应用于更复杂的多步推理任务，如理解用户复杂意图或进行深度内容分析。但该技术属于通用推理增强，与推荐/搜索/广告领域的直接关联性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:08:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18434v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18434v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Chain-of-Thought (CoT) is widely applied to improve the LLM capability in math, coding and reasoning tasks. However, its performance is limited for open-domain tasks since there are no clearly defined reasoning steps or logical transitions. To mitigate such challenges, we propose another prompt-based paradigm called Chain of Conceptual Thought (CoCT), where the LLM first tags a concept, then generates the detailed content. The chain of concepts is allowed within the utterance, encouraging the LLM's deep and strategic thinking. We experiment with this paradigm in daily and emotional support conversations where the concept is comprised of emotions, strategies and topics. Automatic, human and model evaluations suggest that CoCT surpasses baselines such as Self-Refine, ECoT, ToT, SoT and RAG, suggesting a potential effective prompt-based paradigm of LLM for a wider scope of tasks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18383v1" target="_blank" rel="noopener noreferrer">
                MENTOR：一种通过教师优化奖励在小模型中增强模型的强化学习框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MENTOR: A Reinforcement Learning Framework for Model Enhancement via Teacher-Optimized Rewards in Small Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>ChangSu Choi, Hoyun Song, Dongyeon Kim, WooHyeon Jung, Minkyung Cho, Sunjin Park...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文虽然涉及强化学习框架，但主要聚焦于小模型的增强技术，而非直接应用于推荐系统、搜索或广告领域。虽然教师优化奖励机制在理论上可能用于改进推荐策略，但论文标题未明确表明与RecSys/Search/Ads的具体关联，且强化学习本身被列为次要关注点。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 08:03:14
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18383v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18383v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Distilling the tool-using capabilities of large language models (LLMs) into smaller, more efficient small language models (SLMs) is a key challenge for their practical application. The predominant approach, supervised fine-tuning (SFT), suffers from poor generalization as it trains models to imitate a static set of teacher trajectories rather than learn a robust methodology. While reinforcement learning (RL) offers an alternative, the standard RL using sparse rewards fails to effectively guide SLMs, causing them to struggle with inefficient exploration and adopt suboptimal strategies. To address these distinct challenges, we propose MENTOR, a framework that synergistically combines RL with teacher-guided distillation. Instead of simple imitation, MENTOR employs an RL-based process to learn a more generalizable policy through exploration. In addition, to solve the problem of reward sparsity, it uses a teacher's reference trajectory to construct a dense, composite teacher-guided reward that provides fine-grained guidance. Extensive experiments demonstrate that MENTOR significantly improves the cross-domain generalization and strategic competence of SLMs compared to both SFT and standard sparse-reward RL baselines.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18703v1" target="_blank" rel="noopener noreferrer">
                探索多模态网页文档上统一的视觉中心对比学习替代方案
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Exploring a Unified Vision-Centric Contrastive Alternatives on Multi-Modal Web Documents
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yiqi Lin, Alex Jinpeng Wang, Linjie Li, Zhengyuan Yang, Mike Zheng Shou
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注视觉中心的多模态对比学习，与推荐系统/搜索中的异构数据处理有一定类比潜力，但主要针对网页文档的视觉模态，缺乏明确的RecSys/Search/Ads应用场景说明。虽然VLM类比思想可能相关，但论文焦点过于偏向视觉分析，而非推荐/搜索的核心排序或用户建模问题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 14:59:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18703v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18703v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Contrastive vision-language models such as CLIP have demonstrated strong performance across a wide range of multimodal tasks by learning from aligned image-text pairs. However, their ability to handle complex, real-world web documents remains limited, particularly in scenarios where text and images are interleaved, loosely aligned, or embedded in visual form. To address these challenges, we propose Vision-Centric Contrastive Learning (VC2L), a unified framework that models text, images, and their combinations using a single vision transformer. VC2L operates entirely in pixel space by rendering all inputs, whether textual, visual, or combined, as images, thus eliminating the need for OCR, text tokenization, or modality fusion strategy. To capture complex cross-modal relationships in multimodal web documents, VC2L employs a snippet-level contrastive learning objective that aligns consecutive multimodal segments, leveraging the inherent coherence of documents without requiring explicitly paired image-text data. To assess the effectiveness of this approach, we introduce three retrieval benchmarks, AnyCIR, SeqCIR, and CSR, designed to evaluate cross-modal retrieval, fine-grained sequential understanding, and generalization to unseen data, respectively. Empirical results show that VC2L achieves competitive or superior performance compared to CLIP-style models on both the proposed benchmarks and established datasets such as M-BEIR and MTEB. These findings underscore the potential of multimodal web data as a valuable training resource for contrastive learning and illustrate the scalability of a unified, vision-centric approach for multimodal representation learning. Code and models are available at: https://github.com/showlab/VC2L.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18650v1" target="_blank" rel="noopener noreferrer">
                二元二次量化：超越一阶量化的实值矩阵压缩方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Binary Quadratic Quantization: Beyond First-Order Quantization for Real-Valued Matrix Compression
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kyo Kuroki, Yasuyuki Okoshi, Thiem Van Chu, Kazushi Kawamura, Masato Motomura
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于矩阵压缩的量化技术，属于通用的模型压缩方法。虽然量化技术可以应用于推荐系统和搜索中的大规模模型部署，但论文标题没有明确指向Transformer架构、LLM技术或推荐/搜索/广告领域的特定应用。这种通用压缩技术可能间接有助于这些领域的模型效率优化，但缺乏直接的应用关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 13:58:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18650v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18650v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span><span class="category-tag">cs.NE</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper proposes a novel matrix quantization method, Binary Quadratic Quantization (BQQ). In contrast to conventional first-order quantization approaches, such as uniform quantization and binary coding quantization, that approximate real-valued matrices via linear combinations of binary bases, BQQ leverages the expressive power of binary quadratic expressions while maintaining an extremely compact data format. We validate our approach with two experiments: a matrix compression benchmark and post-training quantization (PTQ) on pretrained Vision Transformer-based models. Experimental results demonstrate that BQQ consistently achieves a superior trade-off between memory efficiency and reconstruction error than conventional methods for compressing diverse matrix data. It also delivers strong PTQ performance, even though we neither target state-of-the-art PTQ accuracy under tight memory constraints nor rely on PTQ-specific binary matrix optimization. For example, our proposed method outperforms the state-of-the-art PTQ method by up to 2.2\% and 59.1% on the ImageNet dataset under the calibration-based and data-free scenarios, respectively, with quantization equivalent to 2 bits. These findings highlight the surprising effectiveness of binary quadratic expressions for efficient matrix approximation and neural network compression.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18636v1" target="_blank" rel="noopener noreferrer">
                C-SWAP：面向可解释性的结构化剪枝用于高效神经网络压缩
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            C-SWAP: Explainability-Aware Structured Pruning for Efficient Neural Networks Compression
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Baptiste Bauvin, Loïc Baret, Ola Ahmad
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注神经网络压缩中的结构化剪枝技术，虽然属于模型效率优化领域，但未明确涉及Transformer架构或LLM技术。在推荐系统、搜索或广告中，此类剪枝技术可能用于模型部署优化，但论文标题未体现与这些领域的直接关联或具体应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 13:40:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18636v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18636v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Neural network compression has gained increasing attention in recent years, particularly in computer vision applications, where the need for model reduction is crucial for overcoming deployment constraints. Pruning is a widely used technique that prompts sparsity in model structures, e.g. weights, neurons, and layers, reducing size and inference costs. Structured pruning is especially important as it allows for the removal of entire structures, which further accelerates inference time and reduces memory overhead. However, it can be computationally expensive, requiring iterative retraining and optimization. To overcome this problem, recent methods considered one-shot setting, which applies pruning directly at post-training. Unfortunately, they often lead to a considerable drop in performance. In this paper, we focus on this issue by proposing a novel one-shot pruning framework that relies on explainable deep learning. First, we introduce a causal-aware pruning approach that leverages cause-effect relations between model predictions and structures in a progressive pruning process. It allows us to efficiently reduce the size of the network, ensuring that the removed structures do not deter the performance of the model. Then, through experiments conducted on convolution neural network and vision transformer baselines, pre-trained on classification tasks, we demonstrate that our method consistently achieves substantial reductions in model size, with minimal impact on performance, and without the need for fine-tuning. Overall, our approach outperforms its counterparts, offering the best trade-off. Our code is available on GitHub.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18583v1" target="_blank" rel="noopener noreferrer">
                CovMatch：基于交叉协方差引导的多模态数据集蒸馏与可训练文本编码器
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CovMatch: Cross-Covariance Guided Multimodal Dataset Distillation with Trainable Text Encoder
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yongmin Lee, Hye Won Chung
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多模态数据集蒸馏技术，虽然涉及文本编码器训练，但其核心是数据集蒸馏而非Transformer架构或LLM技术的直接进展。在推荐系统或搜索广告领域的潜在应用有限，仅可能通过改进多模态数据表示间接影响特征工程，但缺乏明确的直接应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 12:36:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18583v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18583v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimodal dataset distillation aims to synthesize a small set of image-text pairs that enables efficient training of large-scale vision-language models. While dataset distillation has shown promise in unimodal tasks, extending it to multimodal contrastive learning presents key challenges: learning cross-modal alignment and managing the high computational cost of large encoders. Prior approaches address scalability by freezing the text encoder and update only the image encoder and text projection layer. However, we find this severely limits semantic alignment and becomes a bottleneck for performance scaling. We propose CovMatch, a scalable dataset distillation framework that aligns the cross-covariance of real and synthetic features while regularizing feature distributions within each modality. Unlike prior approaches, CovMatch enables joint optimization of both encoders, leading to stronger cross-modal alignment and improved performance. Evaluated on Flickr30K and COCO, CovMatch outperforms state-of-the-art multimodal distillation methods and achieves up to 6.8% absolute gains in retrieval accuracy using only 500 synthetic pairs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18377v1" target="_blank" rel="noopener noreferrer">
                跨模态场景语义对齐用于图像复杂度评估
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Cross-Modal Scene Semantic Alignment for Image Complexity Assessment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuqing Luo, Yixiao Li, Jiang Liu, Jun Fu, Hadi Amirpour, Guanghui Yue, Baoquan Z...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文虽然涉及跨模态对齐技术，但其核心应用是图像复杂度评估，这与推荐系统、搜索或广告的直接相关性较弱。跨模态对齐技术本身在理论上可以应用于处理推荐系统中的异构数据（如用户行为序列和上下文特征），但论文的具体应用场景限制了其直接实用性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 07:52:40
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18377v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18377v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Image complexity assessment (ICA) is a challenging task in perceptual evaluation due to the subjective nature of human perception and the inherent semantic diversity in real-world images. Existing ICA methods predominantly rely on hand-crafted or shallow convolutional neural network-based features of a single visual modality, which are insufficient to fully capture the perceived representations closely related to image complexity. Recently, cross-modal scene semantic information has been shown to play a crucial role in various computer vision tasks, particularly those involving perceptual understanding. However, the exploration of cross-modal scene semantic information in the context of ICA remains unaddressed. Therefore, in this paper, we propose a novel ICA method called Cross-Modal Scene Semantic Alignment (CM-SSA), which leverages scene semantic alignment from a cross-modal perspective to enhance ICA performance, enabling complexity predictions to be more consistent with subjective human perception. Specifically, the proposed CM-SSA consists of a complexity regression branch and a scene semantic alignment branch. The complexity regression branch estimates image complexity levels under the guidance of the scene semantic alignment branch, while the scene semantic alignment branch is used to align images with corresponding text prompts that convey rich scene semantic information by pair-wise learning. Extensive experiments on several ICA datasets demonstrate that the proposed CM-SSA significantly outperforms state-of-the-art approaches. Codes are available at https://github.com/XQ2K/First-Cross-Model-ICA.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18218v1" target="_blank" rel="noopener noreferrer">
                DualHash：一种具有理论保证的深度哈希随机原始-对偶算法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DualHash: A Stochastic Primal-Dual Algorithm with Theoretical Guarantee for Deep Hashing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Luxuan Li, Xiao Wang, Chunfeng Cui
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了深度哈希的优化算法，哈希技术在推荐和搜索系统中常用于近似最近邻搜索和向量检索。然而，该工作主要关注算法理论保证和优化方法，属于底层技术而非核心领域进展，潜在应用包括大规模向量检索系统的效率提升。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 01:52:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18218v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18218v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">math.OC</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Deep hashing converts high-dimensional feature vectors into compact binary codes, enabling efficient large-scale retrieval. A fundamental challenge in deep hashing stems from the discrete nature of quantization in generating the codes. W-type regularizations, such as $||z|-1|$, have been proven effective as they encourage variables toward binary values. However, existing methods often directly optimize these regularizations without convergence guarantees. While proximal gradient methods offer a promising solution, the coupling between W-type regularizers and neural network outputs results in composite forms that generally lack closed-form proximal solutions. In this paper, we present a stochastic primal-dual hashing algorithm, referred to as DualHash, that provides rigorous complexity bounds. Using Fenchel duality, we partially transform the nonconvex W-type regularization optimization into the dual space, which results in a proximal operator that admits closed-form solutions. We derive two algorithm instances: a momentum-accelerated version with $\mathcal{O}(\varepsilon^{-4})$ complexity and an improved $\mathcal{O}(\varepsilon^{-3})$ version using variance reduction. Experiments on three image retrieval databases demonstrate the superior performance of DualHash.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18433v1" target="_blank" rel="noopener noreferrer">
                ImageGem：面向生成模型个性化的野外生成式图像交互数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ImageGem: In-the-wild Generative Image Interaction Dataset for Generative Model Personalization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuanhe Guo, Linxi Xie, Zhuoran Chen, Kangrui Yu, Ryan Po, Guandao Yang, Gordon W...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注生成式模型的个性化数据集创建，属于AIGC和内容生成领域。虽然个性化概念在推荐系统中很重要，但该论文专注于图像生成而非推荐/搜索/广告的核心排名或检索任务，与当前关注点仅有微弱关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:08:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18433v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18433v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce ImageGem, a dataset for studying generative models that understand fine-grained individual preferences. We posit that a key challenge hindering the development of such a generative model is the lack of in-the-wild and fine-grained user preference annotations. Our dataset features real-world interaction data from 57K users, who collectively have built 242K customized LoRAs, written 3M text prompts, and created 5M generated images. With user preference annotations from our dataset, we were able to train better preference alignment models. In addition, leveraging individual user preference, we investigated the performance of retrieval models and a vision-language model on personalized image retrieval and generative model recommendation. Finally, we propose an end-to-end framework for editing customized diffusion models in a latent weight space to align with individual user preferences. Our results demonstrate that the ImageGem dataset enables, for the first time, a new paradigm for generative model personalization.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18874v1" target="_blank" rel="noopener noreferrer">
                通过实践保留：在线策略数据在缓解遗忘中的作用
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Retaining by Doing: The Role of On-Policy Data in Mitigating Forgetting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Howard Chen, Noam Razin, Karthik Narasimhan, Danqi Chen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注在线策略数据在缓解模型遗忘中的作用，这属于强化学习中的持续学习问题。虽然强化学习在推荐系统中有所应用，但论文标题没有明确指向推荐系统、搜索或广告领域的具体应用场景，也没有提及LLM、Transformer架构或多模态建模等核心关注技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:59:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18874v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18874v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Adapting language models (LMs) to new tasks via post-training carries the risk of degrading existing capabilities -- a phenomenon classically known as catastrophic forgetting. In this paper, toward identifying guidelines for mitigating this phenomenon, we systematically compare the forgetting patterns of two widely adopted post-training methods: supervised fine-tuning (SFT) and reinforcement learning (RL). Our experiments reveal a consistent trend across LM families (Llama, Qwen) and tasks (instruction following, general knowledge, and arithmetic reasoning): RL leads to less forgetting than SFT while achieving comparable or higher target task performance. To investigate the cause for this difference, we consider a simplified setting in which the LM is modeled as a mixture of two distributions, one corresponding to prior knowledge and the other to the target task. We identify that the mode-seeking nature of RL, which stems from its use of on-policy data, enables keeping prior knowledge intact when learning the target task. We then verify this insight by demonstrating that the use on-policy data underlies the robustness of RL to forgetting in practical settings, as opposed to other algorithmic choices such as the KL regularization or advantage estimation. Lastly, as a practical implication, our results highlight the potential of mitigating forgetting using approximately on-policy data, which can be substantially more efficient to obtain than fully on-policy data.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18855v1" target="_blank" rel="noopener noreferrer">
                每一步都在进化：为万亿规模思维模型扩展强化学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Every Step Evolves: Scaling Reinforcement Learning for Trillion-Scale Thinking Model
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ling Team, Anqi Shen, Baihui Li, Bin Hu, Bin Jing, Cai Chen, Chao Huang, Chao Zh...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然论文涉及强化学习的扩展技术，但标题明确聚焦于'思维模型'而非推荐系统、搜索或广告应用。强化学习在推荐系统中确实有应用，但该论文没有明确展示与这些领域的直接关联，且'万亿规模思维模型'听起来更像是通用AI而非特定领域应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:46:14
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18855v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18855v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present Ring-1T, the first open-source, state-of-the-art thinking model with a trillion-scale parameter. It features 1 trillion total parameters and activates approximately 50 billion per token. Training such models at a trillion-parameter scale introduces unprecedented challenges, including train-inference misalignment, inefficiencies in rollout processing, and bottlenecks in the RL system. To address these, we pioneer three interconnected innovations: (1) IcePop stabilizes RL training via token-level discrepancy masking and clipping, resolving instability from training-inference mismatches; (2) C3PO++ improves resource utilization for long rollouts under a token budget by dynamically partitioning them, thereby obtaining high time efficiency; and (3) ASystem, a high-performance RL framework designed to overcome the systemic bottlenecks that impede trillion-parameter model training. Ring-1T delivers breakthrough results across critical benchmarks: 93.4 on AIME-2025, 86.72 on HMMT-2025, 2088 on CodeForces, and 55.94 on ARC-AGI-v1. Notably, it attains a silver medal-level result on the IMO-2025, underscoring its exceptional reasoning capabilities. By releasing the complete 1T parameter MoE model to the community, we provide the research community with direct access to cutting-edge reasoning capabilities. This contribution marks a significant milestone in democratizing large-scale reasoning intelligence and establishes a new baseline for open-source model performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18840v1" target="_blank" rel="noopener noreferrer">
                视觉阅读：从分词到视觉理解
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            See the Text: From Tokenization to Visual Reading
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ling Xing, Alex Jinpeng Wang, Rui Yan, Hongyu Qu, Zechao Li, Jinhui Tang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题暗示了从文本处理（分词）到视觉理解的过渡，可能涉及多模态学习。虽然视觉语言模型（VLM）的类比在异构数据处理中具有潜在应用，但标题本身过于宽泛，没有明确指向推荐系统、搜索或广告领域的特定应用。其潜在相关性取决于论文是否将视觉阅读概念应用于用户行为序列或上下文特征等异构数据模态的统一建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:34:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18840v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18840v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    People see text. Humans read by recognizing words as visual objects, including their shapes, layouts, and patterns, before connecting them to meaning, which enables us to handle typos, distorted fonts, and various scripts effectively. Modern large language models (LLMs), however, rely on subword tokenization, fragmenting text into pieces from a fixed vocabulary. While effective for high-resource languages, this approach over-segments low-resource languages, yielding long, linguistically meaningless sequences and inflating computation. In this work, we challenge this entrenched paradigm and move toward a vision-centric alternative. Our method, SeeTok, renders text as images (visual-text) and leverages pretrained multimodal LLMs to interpret them, reusing strong OCR and text-vision alignment abilities learned from large-scale multimodal training. Across three different language tasks, SeeTok matches or surpasses subword tokenizers while requiring 4.43 times fewer tokens and reducing FLOPs by 70.5%, with additional gains in cross-lingual generalization, robustness to typographic noise, and linguistic hierarchy. SeeTok signals a shift from symbolic tokenization to human-like visual reading, and takes a step toward more natural and cognitively inspired language models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18817v1" target="_blank" rel="noopener noreferrer">
                精细调整的思考：利用思维链推理进行工业资产健康监控
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Fine-Tuned Thoughts: Leveraging Chain-of-Thought Reasoning for Industrial Asset Health Monitoring
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuxin Lin, Dhaval Patel, Christodoulos Constantinides
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注工业资产健康监控这一特定领域应用，属于工业物联网和预测性维护范畴，与推荐系统、搜索或广告的核心领域没有直接关联。虽然提到了思维链推理这一LLM技术，但其应用场景过于特定且远离RecSys/Search/Ads领域，因此相关性很低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:18:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18817v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18817v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Small Language Models (SLMs) are becoming increasingly popular in specialized fields, such as industrial applications, due to their efficiency, lower computational requirements, and ability to be fine-tuned for domain-specific tasks, enabling accurate and cost-effective solutions. However, performing complex reasoning using SLMs in specialized fields such as Industry 4.0 remains challenging. In this paper, we propose a knowledge distillation framework for industrial asset health, which transfers reasoning capabilities via Chain-of-Thought (CoT) distillation from Large Language Models (LLMs) to smaller, more efficient models (SLMs). We discuss the advantages and the process of distilling LLMs using multi-choice question answering (MCQA) prompts to enhance reasoning and refine decision-making. We also perform in-context learning to verify the quality of the generated knowledge and benchmark the performance of fine-tuned SLMs with generated knowledge against widely used LLMs. The results show that the fine-tuned SLMs with CoT reasoning outperform the base models by a significant margin, narrowing the gap to their LLM counterparts. Our code is open-sourced at: https://github.com/IBM/FailureSensorIQ.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18731v1" target="_blank" rel="noopener noreferrer">
                课程强化学习中可验证准确性与弃权奖励以缓解对话迷失问题
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Verifiable Accuracy and Abstention Rewards in Curriculum RL to Alleviate Lost-in-Conversation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ming Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然论文涉及强化学习，但明确聚焦于对话系统领域（Lost-in-Conversation问题），这属于纯粹的对话AI应用而非推荐系统、搜索或广告。论文的技术方向（课程RL、准确性与弃权奖励）缺乏明确的RecSys/Search/Ads应用场景，主要解决对话连贯性问题而非排名或个性化推荐任务。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:32:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18731v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18731v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models demonstrate strong capabilities in single-turn instruction following but suffer from Lost-in-Conversation (LiC), a degradation in performance as information is revealed progressively in multi-turn settings. Motivated by the current progress on Reinforcement Learning with Verifiable Rewards (RLVR), we propose Curriculum Reinforcement Learning with Verifiable Accuracy and Abstention Rewards (RLAAR), a framework that encourages models not only to generate correct answers, but also to judge the solvability of questions in the multi-turn conversation setting. Our approach employs a competence-gated curriculum that incrementally increases dialogue difficulty (in terms of instruction shards), stabilizing training while promoting reliability. Using multi-turn, on-policy rollouts and a mixed-reward system, RLAAR teaches models to balance problem-solving with informed abstention, reducing premature answering behaviors that cause LiC. Evaluated on LiC benchmarks, RLAAR significantly mitigates LiC performance decay (62.6% to 75.1%) and improves calibrated abstention rates (33.5% to 73.4%). Together, these results provide a practical recipe for building multi-turn reliable and trustworthy LLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18691v1" target="_blank" rel="noopener noreferrer">
                研究大语言模型在医学问答长上下文理解方面的能力
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Investigating LLM Capabilities on Long Context Comprehension for Medical Question Answering
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Feras AlMannaa, Talia Tseriotou, Jenny Chim, Maria Liakata
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于医学领域的问答应用，这属于明确的无关主题范畴。虽然涉及LLM的长上下文理解，但医学领域的特定应用与推荐系统、搜索或广告领域没有直接关联。长上下文理解技术本身可能有潜在价值，但论文的医学应用场景使其整体相关性很低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 14:50:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18691v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18691v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This study is the first to investigate LLM comprehension capabilities over long-context (LC) medical QA of clinical relevance. Our comprehensive assessment spans a range of content-inclusion settings based on their relevance, LLM models of varying capabilities and datasets across task formulations, revealing insights on model size effects, limitations, underlying memorization issues and the benefits of reasoning models. Importantly, we examine the effect of RAG on medical LC comprehension, uncover best settings in single versus multi-document reasoning datasets and showcase RAG strategies for improvements over LC. We shed light into some of the evaluation aspects using a multi-faceted approach. Our qualitative and error analyses address open questions on when RAG is beneficial over LC, revealing common failure cases.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18561v1" target="_blank" rel="noopener noreferrer">
                基于主题的民间故事类型自动化大语言模型：灰姑娘案例研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Large language models for folktale type automation based on motifs: Cinderella case study
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tjaša Arčon, Marko Robnik-Šikonja, Polona Tratnik
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注民间故事分析和类型自动化，属于特定领域的内容生成应用。虽然使用了LLM技术，但应用场景与推荐系统、搜索或广告领域没有直接关联，属于纯粹的NLP应用范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 12:18:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18561v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18561v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Artificial intelligence approaches are being adapted to many research areas, including digital humanities. We built a methodology for large-scale analyses in folkloristics. Using machine learning and natural language processing, we automatically detected motifs in a large collection of Cinderella variants and analysed their similarities and differences with clustering and dimensionality reduction. The results show that large language models detect complex interactions in tales, enabling computational analysis of extensive text collections and facilitating cross-lingual comparisons.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18510v1" target="_blank" rel="noopener noreferrer">
                身份感知大语言模型需要文化推理能力
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Identity-Aware Large Language Models require Cultural Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Alistair Plum, Anne-Marie Lutgen, Christoph Purschke, Achim Rettinger
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">This paper focuses on cultural reasoning and identity awareness in LLMs, which falls under the 'Hallucination, Evaluation benchmarks, or other purely NLP-centric topics' category that was explicitly excluded. While cultural understanding could theoretically benefit personalized recommendations, the paper's primary focus appears to be on general LLM capabilities rather than direct applications to RecSys/Search/Ads systems.</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 10:50:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18510v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18510v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models have become the latest trend in natural language processing, heavily featuring in the digital tools we use every day. However, their replies often reflect a narrow cultural viewpoint that overlooks the diversity of global users. This missing capability could be referred to as cultural reasoning, which we define here as the capacity of a model to recognise culture-specific knowledge values and social norms, and to adjust its output so that it aligns with the expectations of individual users. Because culture shapes interpretation, emotional resonance, and acceptable behaviour, cultural reasoning is essential for identity-aware AI. When this capacity is limited or absent, models can sustain stereotypes, ignore minority perspectives, erode trust, and perpetuate hate. Recent empirical studies strongly suggest that current models default to Western norms when judging moral dilemmas, interpreting idioms, or offering advice, and that fine-tuning on survey data only partly reduces this tendency. The present evaluation methods mainly report static accuracy scores and thus fail to capture adaptive reasoning in context. Although broader datasets can help, they cannot alone ensure genuine cultural competence. Therefore, we argue that cultural reasoning must be treated as a foundational capability alongside factual accuracy and linguistic coherence. By clarifying the concept and outlining initial directions for its assessment, a foundation is laid for future systems to be able to respond with greater sensitivity to the complex fabric of human culture.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18502v1" target="_blank" rel="noopener noreferrer">
                基于文本检索增强生成的零样本车辆型号识别
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Zero-Shot Vehicle Model Recognition via Text-Based Retrieval-Augmented Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wei-Chia Chang, Yan-Ann Chen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然论文涉及检索增强生成(RAG)技术，但其应用场景是车辆型号识别，这属于计算机视觉领域而非推荐系统、搜索或广告的核心范畴。该技术可能间接启发多模态检索系统，但与当前关注的LLM在RecSys/Search/Ads中的直接应用或核心架构进展关联较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 10:39:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18502v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18502v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vehicle make and model recognition (VMMR) is an important task in intelligent transportation systems, but existing approaches struggle to adapt to newly released models. Contrastive Language-Image Pretraining (CLIP) provides strong visual-text alignment, yet its fixed pretrained weights limit performance without costly image-specific finetuning. We propose a pipeline that integrates vision language models (VLMs) with Retrieval-Augmented Generation (RAG) to support zero-shot recognition through text-based reasoning. A VLM converts vehicle images into descriptive attributes, which are compared against a database of textual features. Relevant entries are retrieved and combined with the description to form a prompt, and a language model (LM) infers the make and model. This design avoids large-scale retraining and enables rapid updates by adding textual descriptions of new vehicles. Experiments show that the proposed method improves recognition by nearly 20% over the CLIP baseline, demonstrating the potential of RAG-enhanced LM reasoning for scalable VMMR in smart-city applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18471v1" target="_blank" rel="noopener noreferrer">
                CodeRL+：通过强化学习与执行语义对齐改进代码生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CodeRL+: Improving Code Generation via Reinforcement with Execution Semantics Alignment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xue Jiang, Yihong Dong, Mengyang Liu, Hongyi Deng, Tian Wang, Yongding Tao, Rong...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于代码生成领域的强化学习技术改进，属于纯粹的NLP应用场景。虽然涉及强化学习，但与推荐系统、搜索或广告的核心技术需求没有直接关联，执行语义对齐技术也难以迁移到这些领域的用户行为序列建模中。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:48:06
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18471v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18471v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.SE</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    While Large Language Models (LLMs) excel at code generation by learning from vast code corpora, a fundamental semantic gap remains between their training on textual patterns and the goal of functional correctness, which is governed by formal execution semantics. Reinforcement Learning with Verifiable Rewards (RLVR) approaches attempt to bridge this gap using outcome rewards from executing test cases. However, solely relying on binary pass/fail signals is inefficient for establishing a well-aligned connection between the textual representation of code and its execution semantics, especially for subtle logical errors within the code. In this paper, we propose CodeRL+, a novel approach that integrates execution semantics alignment into the RLVR training pipeline for code generation. CodeRL+ enables the model to infer variable-level execution trajectory, providing a direct learning signal of execution semantics. CodeRL+ can construct execution semantics alignment directly using existing on-policy rollouts and integrates seamlessly with various RL algorithms. Extensive experiments demonstrate that CodeRL+ outperforms post-training baselines (including RLVR and Distillation), achieving a 4.6% average relative improvement in pass@1. CodeRL+ generalizes effectively to other coding tasks, yielding 15.5% and 4.4% higher accuracy on code-reasoning and test-output-generation benchmarks, respectively. CodeRL+ shows strong applicability across diverse RL algorithms and LLMs. Furthermore, probe analyses provide compelling evidence that CodeRL+ strengthens the alignment between code's textual representations and its underlying execution semantics.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18455v1" target="_blank" rel="noopener noreferrer">
                ChronoPlay：游戏RAG基准中双动态性与真实性建模框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ChronoPlay: A Framework for Modeling Dual Dynamics and Authenticity in Game RAG Benchmarks
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Liyang He, Yuren Zhang, Ziwei Zhu, Zhenghui Li, Shiwei Tong
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于游戏领域的RAG基准测试，主要涉及游戏动态性和真实性建模，与我的核心关注点（推荐系统、搜索、广告）相关性较弱。虽然RAG技术本身在搜索中有应用潜力，但论文的特定游戏领域焦点限制了其在RecSys/Search/Ads中的直接适用性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:28:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18455v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18455v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Retrieval Augmented Generation (RAG) systems are increasingly vital in dynamic domains like online gaming, yet the lack of a dedicated benchmark has impeded standardized evaluation in this area. The core difficulty lies in Dual Dynamics: the constant interplay between game content updates and the shifting focus of the player community. Furthermore, the necessity of automating such a benchmark introduces a critical requirement for player-centric authenticity to ensure generated questions are realistic. To address this integrated challenge, we introduce ChronoPlay, a novel framework for the automated and continuous generation of game RAG benchmarks. ChronoPlay utilizes a dual-dynamic update mechanism to track both forms of change, and a dual-source synthesis engine that draws from official sources and player community to ensure both factual correctness and authentic query patterns. We instantiate our framework on three distinct games to create the first dynamic RAG benchmark for the gaming domain, offering new insights into model performance under these complex and realistic conditions. Code is avaliable at: https://github.com/hly1998/ChronoPlay.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18454v1" target="_blank" rel="noopener noreferrer">
                参与度削弱安全性：刻板印象与毒性如何塑造语言模型中的幽默
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Engagement Undermines Safety: How Stereotypes and Toxicity Shape Humor in Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Atharvan Dogra, Soumya Suvra Ghosal, Ameet Deshpande, Ashwin Kalyan, Dinesh Mano...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究语言模型中的刻板印象、毒性和安全性问题，这些属于伦理和公平性范畴，已被明确列为无关主题。虽然涉及语言模型，但核心关注点（安全性、毒性）与推荐系统、搜索或广告的技术进展没有直接关联，且未展示在相关领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:28:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18454v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18454v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models are increasingly used for creative writing and engagement content, raising safety concerns about the outputs. Therefore, casting humor generation as a testbed, this work evaluates how funniness optimization in modern LLM pipelines couples with harmful content by jointly measuring humor, stereotypicality, and toxicity. This is further supplemented by analyzing incongruity signals through information-theoretic metrics. Across six models, we observe that harmful outputs receive higher humor scores which further increase under role-based prompting, indicating a bias amplification loop between generators and evaluators. Information-theoretic analyses show harmful cues widen predictive uncertainty and surprisingly, can even make harmful punchlines more expected for some models, suggesting structural embedding in learned humor distributions. External validation on an additional satire-generation task with human perceived funniness judgments shows that LLM satire increases stereotypicality and typically toxicity, including for closed models. Quantitatively, stereotypical/toxic jokes gain $10-21\%$ in mean humor score, stereotypical jokes appear $11\%$ to $28\%$ more often among the jokes marked funny by LLM-based metric and up to $10\%$ more often in generations perceived as funny by humans.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18368v1" target="_blank" rel="noopener noreferrer">
                KoSimpleQA：一个带有推理大语言模型分析的韩语事实性基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            KoSimpleQA: A Korean Factuality Benchmark with an Analysis of Reasoning LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Donghyeon Ko, Yeguk Jin, Kyubyung Chae, Byungwook Lee, Chansong Jo, Sookyo In, J...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注韩语事实性基准测试和LLM推理分析，属于评估基准和纯NLP中心主题，与推荐系统、搜索或广告的核心技术进展无关。虽然涉及LLM，但焦点是语言特定的基准测试而非能够应用于RecSys/Search/Ads的使能技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 07:37:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18368v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18368v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present $\textbf{Korean SimpleQA (KoSimpleQA)}$, a benchmark for evaluating factuality in large language models (LLMs) with a focus on Korean cultural knowledge. KoSimpleQA is designed to be challenging yet easy to grade, consisting of 1,000 short, fact-seeking questions with unambiguous answers. We conduct a comprehensive evaluation across a diverse set of open-source LLMs of varying sizes that support Korean, and find that even the strongest model generates correct answer only 33.7% of the time, underscoring the challenging nature of KoSimpleQA. Notably, performance rankings on KoSimpleQA differ substantially from those on the English SimpleQA, highlighting the unique value of our dataset. Furthermore, our analysis of reasoning LLMs shows that engaging reasoning capabilities in the factual QA task can both help models better elicit their latent knowledge and improve their ability to abstain when uncertain. KoSimpleQA can be found at https://anonymous.4open.science/r/KoSimpleQA-62EB.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18344v1" target="_blank" rel="noopener noreferrer">
                结合远程监督模型与上下文学习进行单语和跨语言关系抽取
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Combining Distantly Supervised Models with In Context Learning for Monolingual and Cross-Lingual Relation Extraction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Vipul Rathore, Malik Hammad Faisal, Parag Singla, Mausam
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注关系抽取这一NLP特定任务，属于信息抽取领域，与推荐系统、搜索或广告的核心技术关联度较低。虽然上下文学习是LLM技术的一部分，但论文的应用场景局限于关系抽取，缺乏明确的RecSys/Search/Ads应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 06:55:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18344v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18344v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Distantly Supervised Relation Extraction (DSRE) remains a long-standing challenge in NLP, where models must learn from noisy bag-level annotations while making sentence-level predictions. While existing state-of-the-art (SoTA) DSRE models rely on task-specific training, their integration with in-context learning (ICL) using large language models (LLMs) remains underexplored. A key challenge is that the LLM may not learn relation semantics correctly, due to noisy annotation. In response, we propose HYDRE -- HYbrid Distantly Supervised Relation Extraction framework. It first uses a trained DSRE model to identify the top-k candidate relations for a given test sentence, then uses a novel dynamic exemplar retrieval strategy that extracts reliable, sentence-level exemplars from training data, which are then provided in LLM prompt for outputting the final relation(s). We further extend HYDRE to cross-lingual settings for RE in low-resource languages. Using available English DSRE training data, we evaluate all methods on English as well as a newly curated benchmark covering four diverse low-resource Indic languages -- Oriya, Santali, Manipuri, and Tulu. HYDRE achieves up to 20 F1 point gains in English and, on average, 17 F1 points on Indic languages over prior SoTA DSRE models. Detailed ablations exhibit HYDRE's efficacy compared to other prompting strategies.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18297v1" target="_blank" rel="noopener noreferrer">
                从检索到生成：统一外部知识与参数化知识用于医疗问答
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            From Retrieval to Generation: Unifying External and Parametric Knowledge for Medical Question Answering
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lei Li, Xiao Zhou, Yingying Zhang, Xian Wu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文虽然涉及检索与生成的统一方法，但其应用领域是医疗问答，这属于明确的无关主题。尽管统一外部知识和参数化知识的技术可能具有通用性，但论文没有展示与推荐系统、搜索或广告的明确相关性，因此评分较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 04:58:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18297v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18297v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Medical question answering (QA) requires extensive access to domain-specific knowledge. A promising direction is to enhance large language models (LLMs) with external knowledge retrieved from medical corpora or parametric knowledge stored in model parameters. Existing approaches typically fall into two categories: Retrieval-Augmented Generation (RAG), which grounds model reasoning on externally retrieved evidence, and Generation-Augmented Generation (GAG), which depends solely on the models internal knowledge to generate contextual documents. However, RAG often suffers from noisy or incomplete retrieval, while GAG is vulnerable to hallucinated or inaccurate information due to unconstrained generation. Both issues can mislead reasoning and undermine answer reliability. To address these challenges, we propose MedRGAG, a unified retrieval-generation augmented framework that seamlessly integrates external and parametric knowledge for medical QA. MedRGAG comprises two key modules: Knowledge-Guided Context Completion (KGCC), which directs the generator to produce background documents that complement the missing knowledge revealed by retrieval; and Knowledge-Aware Document Selection (KADS), which adaptively selects an optimal combination of retrieved and generated documents to form concise yet comprehensive evidence for answer generation. Extensive experiments on five medical QA benchmarks demonstrate that MedRGAG achieves a 12.5% improvement over MedRAG and a 4.5% gain over MedGENIE, highlighting the effectiveness of unifying retrieval and generation for knowledge-intensive reasoning. Our code and data are publicly available at https://anonymous.4open.science/r/MedRGAG
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18214v1" target="_blank" rel="noopener noreferrer">
                VLSU：为AI安全绘制联合多模态理解的极限边界
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VLSU: Mapping the Limits of Joint Multimodal Understanding for AI Safety
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shruti Palaskar, Leon Gatys, Mona Abdelrahman, Mar Jacobo, Larry Lindsey, Rutika...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注AI安全领域的多模态理解极限问题，这属于安全/伦理范畴，属于明确的无关主题。虽然提到了多模态理解，但核心焦点是安全评估而非推荐系统、搜索或广告的技术进步。在推荐/搜索场景中，多模态安全评估可能有一定间接关联，但不符合当前对核心技术进展的关注重点。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 01:30:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18214v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18214v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Safety evaluation of multimodal foundation models often treats vision and language inputs separately, missing risks from joint interpretation where benign content becomes harmful in combination. Existing approaches also fail to distinguish clearly unsafe content from borderline cases, leading to problematic over-blocking or under-refusal of genuinely harmful content. We present Vision Language Safety Understanding (VLSU), a comprehensive framework to systematically evaluate multimodal safety through fine-grained severity classification and combinatorial analysis across 17 distinct safety patterns. Using a multi-stage pipeline with real-world images and human annotation, we construct a large-scale benchmark of 8,187 samples spanning 15 harm categories. Our evaluation of eleven state-of-the-art models reveals systematic joint understanding failures: while models achieve 90%-plus accuracy on clear unimodal safety signals, performance degrades substantially to 20-55% when joint image-text reasoning is required to determine the safety label. Most critically, 34% of errors in joint image-text safety classification occur despite correct classification of the individual modalities, further demonstrating absent compositional reasoning capabilities. Additionally, we find that models struggle to balance refusing unsafe content while still responding to borderline cases that deserve engagement. For example, we find that instruction framing can reduce the over-blocking rate on borderline content from 62.4% to 10.4% in Gemini-1.5, but only at the cost of under-refusing on unsafe content with refusal rate dropping from 90.8% to 53.9%. Overall, our framework exposes weaknesses in joint image-text understanding and alignment gaps in current models, and provides a critical test bed to enable the next milestones in research on robust vision-language safety.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18201v1" target="_blank" rel="noopener noreferrer">
                MARCUS：一种基于事件中心的自然语言处理流程，可从叙事文本中生成角色发展轨迹
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MARCUS: An Event-Centric NLP Pipeline that generates Character Arcs from Narratives
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sriharsh Bhyravajjula, Ujwal Narayan, Manish Shrivastava
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于从叙事文本中提取角色发展轨迹的NLP流程，这属于纯粹的文本分析应用。虽然涉及序列建模，但主要针对文学分析领域，与推荐系统、搜索或广告的核心技术需求缺乏直接关联。其事件提取和角色轨迹建模技术可能对用户行为序列分析有微弱启发，但应用场景差异过大。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 01:03:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18201v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18201v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Character arcs are important theoretical devices employed in literary studies to understand character journeys, identify tropes across literary genres, and establish similarities between narratives. This work addresses the novel task of computationally generating event-centric, relation-based character arcs from narratives. Providing a quantitative representation for arcs brings tangibility to a theoretical concept and paves the way for subsequent applications. We present MARCUS (Modelling Arcs for Understanding Stories), an NLP pipeline that extracts events, participant characters, implied emotion, and sentiment to model inter-character relations. MARCUS tracks and aggregates these relations across the narrative to generate character arcs as graphical plots. We generate character arcs from two extended fantasy series, Harry Potter and Lord of the Rings. We evaluate our approach before outlining existing challenges, suggesting applications of our pipeline, and discussing future work.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18196v1" target="_blank" rel="noopener noreferrer">
                对比解码缓解了LLM作为评判者时的分数范围偏差
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Contrastive Decoding Mitigates Score Range Bias in LLM-as-a-Judge
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yoshinari Fujinuma
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM评估中的评分偏差问题，属于纯粹的LLM评估和基准测试范畴，与推荐系统、搜索或广告的核心技术进展无关。虽然提到了对比解码技术，但应用场景仅限于改善LLM作为评判者的表现，没有展示在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 00:47:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18196v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18196v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) are commonly used as evaluators in various applications, but the reliability of the outcomes remains a challenge. One such challenge is using LLMs-as-judges for direct assessment, i.e., assigning scores from a specified range without any references. We first show that this challenge stems from LLM judge outputs being associated with score range bias, i.e., LLM judge outputs are highly sensitive to pre-defined score ranges, preventing the search for optimal score ranges. We also show that similar biases exist among models from the same family. We then mitigate this bias through contrastive decoding, achieving up to 11.3% relative improvement on average in Spearman correlation with human judgments across different score ranges.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18873v1" target="_blank" rel="noopener noreferrer">
                DSI-Bench：动态空间智能基准测试
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DSI-Bench: A Benchmark for Dynamic Spatial Intelligence
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ziang Zhang, Zehan Wang, Guanghao Zhang, Weilong Dai, Yan Xia, Ziang Yan, Minjie...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题表明这是一个关于动态空间智能的基准测试，主要涉及空间推理和动态环境理解能力评估。虽然空间智能在某些搜索场景中可能有间接应用（如地图搜索、位置推荐），但这与当前关注的核心推荐系统、搜索广告、LLM技术或Transformer架构进展的直接关联性较弱。该基准更偏向通用的空间认知能力评估，而非专门针对RecSys/Search/Ads领域的核心问题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:59:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18873v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18873v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Reasoning about dynamic spatial relationships is essential, as both observers and objects often move simultaneously. Although vision-language models (VLMs) and visual expertise models excel in 2D tasks and static scenarios, their ability to fully understand dynamic 3D scenarios remains limited. We introduce Dynamic Spatial Intelligence and propose DSI-Bench, a benchmark with nearly 1,000 dynamic videos and over 1,700 manually annotated questions covering nine decoupled motion patterns of observers and objects. Spatially and temporally symmetric designs reduce biases and enable systematic evaluation of models' reasoning about self-motion and object motion. Our evaluation of 14 VLMs and expert models reveals key limitations: models often conflate observer and object motion, exhibit semantic biases, and fail to accurately infer relative relationships in dynamic scenarios. Our DSI-Bench provides valuable findings and insights about the future development of general and expertise models with dynamic spatial intelligence.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18851v1" target="_blank" rel="noopener noreferrer">
                DP²O-SR：面向真实世界图像超分辨率的直接感知偏好优化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DP$^2$O-SR: Direct Perceptual Preference Optimization for Real-World Image Super-Resolution
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Rongyuan Wu, Lingchen Sun, Zhengqiang Zhang, Shihao Wang, Tianhe Wu, Qiaosi Yi, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的图像超分辨率技术，虽然涉及优化方法，但缺乏与推荐系统、搜索或广告领域的明确联系。论文的核心技术（直接感知偏好优化）主要针对图像质量提升，在当前聚焦的领域中难以找到直接的应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:43:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18851v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18851v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Benefiting from pre-trained text-to-image (T2I) diffusion models, real-world image super-resolution (Real-ISR) methods can synthesize rich and realistic details. However, due to the inherent stochasticity of T2I models, different noise inputs often lead to outputs with varying perceptual quality. Although this randomness is sometimes seen as a limitation, it also introduces a wider perceptual quality range, which can be exploited to improve Real-ISR performance. To this end, we introduce Direct Perceptual Preference Optimization for Real-ISR (DP$^2$O-SR), a framework that aligns generative models with perceptual preferences without requiring costly human annotations. We construct a hybrid reward signal by combining full-reference and no-reference image quality assessment (IQA) models trained on large-scale human preference datasets. This reward encourages both structural fidelity and natural appearance. To better utilize perceptual diversity, we move beyond the standard best-vs-worst selection and construct multiple preference pairs from outputs of the same model. Our analysis reveals that the optimal selection ratio depends on model capacity: smaller models benefit from broader coverage, while larger models respond better to stronger contrast in supervision. Furthermore, we propose hierarchical preference optimization, which adaptively weights training pairs based on intra-group reward gaps and inter-group diversity, enabling more efficient and stable learning. Extensive experiments across both diffusion- and flow-based T2I backbones demonstrate that DP$^2$O-SR significantly improves perceptual quality and generalizes well to real-world benchmarks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18822v1" target="_blank" rel="noopener noreferrer">
                SAM 2++：以任意粒度追踪任何目标
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SAM 2++: Tracking Anything at Any Granularity
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiaming Zhang, Cheng Liang, Yichun Yang, Chenkai Zeng, Yutao Cui, Xinwen Zhang, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的通用目标跟踪技术，属于纯粹的视觉任务，与推荐系统、搜索或广告的核心技术栈没有直接关联。虽然跟踪技术在某些边缘场景可能有辅助作用（如用户行为分析），但这不属于当前关注的核心领域进展或使能技术范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:20:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18822v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18822v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Video tracking aims at finding the specific target in subsequent frames given its initial state. Due to the varying granularity of target states across different tasks, most existing trackers are tailored to a single task and heavily rely on custom-designed modules within the individual task, which limits their generalization and leads to redundancy in both model design and parameters. To unify video tracking tasks, we present SAM 2++, a unified model towards tracking at any granularity, including masks, boxes, and points. First, to extend target granularity, we design task-specific prompts to encode various task inputs into general prompt embeddings, and a unified decoder to unify diverse task results into a unified form pre-output. Next, to satisfy memory matching, the core operation of tracking, we introduce a task-adaptive memory mechanism that unifies memory across different granularities. Finally, we introduce a customized data engine to support tracking training at any granularity, producing a large and diverse video tracking dataset with rich annotations at three granularities, termed Tracking-Any-Granularity, which represents a comprehensive resource for training and benchmarking on unified tracking. Comprehensive experiments on multiple benchmarks confirm that SAM 2++ sets a new state of the art across diverse tracking tasks at different granularities, establishing a unified and robust tracking framework.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18813v1" target="_blank" rel="noopener noreferrer">
                一种几何方法导向的可操纵卷积
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Geometric Approach to Steerable Convolutions
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Soumyabrata Kundu, Risi Kondor
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的可操纵卷积技术，属于几何深度学习的范畴。虽然卷积神经网络在推荐系统中用于序列建模有一定应用，但该论文的几何方法和可操纵卷积主要针对视觉数据的对称性处理，与推荐/搜索/广告的核心技术栈关联度较低，且未明确展示在异构数据建模或Transformer架构方面的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:10:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18813v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18813v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In contrast to the somewhat abstract, group theoretical approach adopted by many papers, our work provides a new and more intuitive derivation of steerable convolutional neural networks in $d$ dimensions. This derivation is based on geometric arguments and fundamental principles of pattern matching. We offer an intuitive explanation for the appearance of the Clebsch--Gordan decomposition and spherical harmonic basis functions. Furthermore, we suggest a novel way to construct steerable convolution layers using interpolation kernels that improve upon existing implementation, and offer greater robustness to noisy data.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18751v1" target="_blank" rel="noopener noreferrer">
                Seg the HAB：语言引导的地理空间藻华推理与分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Seg the HAB: Language-Guided Geospatial Algae Bloom Reasoning and Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Patterson Hsieh, Jerry Yeh, Mao-Chi He, Wen-Han Hsieh, Elvis Hsieh
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注地理空间藻华分割的计算机视觉任务，虽然使用了语言引导技术，但核心应用领域是环境监测和遥感，与推荐系统、搜索或广告没有明显关联。语言引导的分割技术可能在多模态推荐中有潜在应用，但论文的具体应用场景过于专业化且远离目标领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:59:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18751v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18751v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Climate change is intensifying the occurrence of harmful algal bloom (HAB), particularly cyanobacteria, which threaten aquatic ecosystems and human health through oxygen depletion, toxin release, and disruption of marine biodiversity. Traditional monitoring approaches, such as manual water sampling, remain labor-intensive and limited in spatial and temporal coverage. Recent advances in vision-language models (VLMs) for remote sensing have shown potential for scalable AI-driven solutions, yet challenges remain in reasoning over imagery and quantifying bloom severity. In this work, we introduce ALGae Observation and Segmentation (ALGOS), a segmentation-and-reasoning system for HAB monitoring that combines remote sensing image understanding with severity estimation. Our approach integrates GeoSAM-assisted human evaluation for high-quality segmentation mask curation and fine-tunes vision language model on severity prediction using the Cyanobacteria Aggregated Manual Labels (CAML) from NASA. Experiments demonstrate that ALGOS achieves robust performance on both segmentation and severity-level estimation, paving the way toward practical and automated cyanobacterial monitoring systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18740v1" target="_blank" rel="noopener noreferrer">
                SEAL：面向广义类别发现的语义感知分层学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SEAL: Semantic-Aware Hierarchical Learning for Generalized Category Discovery
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhenqi He, Yuanpei Liu, Kai Han
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注广义类别发现，这是一个计算机视觉和模式识别领域的问题，涉及在未标记数据中发现新类别。虽然语义感知学习可能有一些概念上的联系，但该工作缺乏明确的推荐系统、搜索或广告应用场景，也没有涉及LLM或Transformer架构的进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:44:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18740v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18740v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper investigates the problem of Generalized Category Discovery (GCD). Given a partially labelled dataset, GCD aims to categorize all unlabelled images, regardless of whether they belong to known or unknown classes. Existing approaches typically depend on either single-level semantics or manually designed abstract hierarchies, which limit their generalizability and scalability. To address these limitations, we introduce a SEmantic-aware hierArchical Learning framework (SEAL), guided by naturally occurring and easily accessible hierarchical structures. Within SEAL, we propose a Hierarchical Semantic-Guided Soft Contrastive Learning approach that exploits hierarchical similarity to generate informative soft negatives, addressing the limitations of conventional contrastive losses that treat all negatives equally. Furthermore, a Cross-Granularity Consistency (CGC) module is designed to align the predictions from different levels of granularity. SEAL consistently achieves state-of-the-art performance on fine-grained benchmarks, including the SSB benchmark, Oxford-Pet, and the Herbarium19 dataset, and further demonstrates generalization on coarse-grained datasets. Project page: https://visual-ai.github.io/seal/
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18726v1" target="_blank" rel="noopener noreferrer">
                IF-VidCap：视频字幕模型能否遵循指令？
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            IF-VidCap: Can Video Caption Models Follow Instructions?
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shihao Li, Yuanxing Zhang, Jiangtao Wu, Zhide Lei, Yiwen He, Runzhe Wen, Chenxi ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究视频字幕模型的指令跟随能力，属于计算机视觉与自然语言处理的交叉领域。虽然指令跟随是LLM的重要能力，但该工作聚焦于视频内容描述这一特定任务，与推荐系统、搜索或广告的核心技术关联度较低，缺乏明确的RecSys/Search/Ads应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:25:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18726v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18726v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Although Multimodal Large Language Models (MLLMs) have demonstrated proficiency in video captioning, practical applications require captions that follow specific user instructions rather than generating exhaustive, unconstrained descriptions. Current benchmarks, however, primarily assess descriptive comprehensiveness while largely overlooking instruction-following capabilities. To address this gap, we introduce IF-VidCap, a new benchmark for evaluating controllable video captioning, which contains 1,400 high-quality samples. Distinct from existing video captioning or general instruction-following benchmarks, IF-VidCap incorporates a systematic framework that assesses captions on two dimensions: format correctness and content correctness. Our comprehensive evaluation of over 20 prominent models reveals a nuanced landscape: despite the continued dominance of proprietary models, the performance gap is closing, with top-tier open-source solutions now achieving near-parity. Furthermore, we find that models specialized for dense captioning underperform general-purpose MLLMs on complex instructions, indicating that future work should simultaneously advance both descriptive richness and instruction-following fidelity.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18716v1" target="_blank" rel="noopener noreferrer">
                SSD：空间-语义头解耦用于高效自回归图像生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SSD: Spatial-Semantic Head Decoupling for Efficient Autoregressive Image Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Siyong Jian, Huan Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注图像生成的自回归模型效率优化，属于纯粹的视觉生成领域。虽然提到了架构效率改进，但缺乏明确的推荐系统、搜索或广告应用场景，与当前关注的RecSys/Search/Ads核心领域相关性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:17:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18716v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18716v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Autoregressive image generation models like Janus-Pro produce high-quality images, but at the significant cost of high memory and ever-growing computational demands due to the large number of visual tokens. While KV cache compression has been extensively studied in language modeling, it still remains largely unexplored for the image generation domain. In this work, we begin by identifying a distinct and prominent attention phenomenon, which we term spatial locality and emergent semantic sink. To leverage this key insight, we introduce a novel KV cache compression framework. Specifically, we compress the KV cache for all visual tokens by adaptively decoupling attention heads into two separate types: for spatial-locality heads, our method maintains a short recent token window; for semantic-sink heads, it strategically preserves a compact set of highly-attended tokens. Our extensive experiments demonstrate that the proposed method achieves a 5$\times$ reduction in memory usage and a notable 6.6$\times$ speedup in overall throughput with only minimal visual quality loss, thereby enabling highly efficient native autoregressive image generation on resource-constrained hardware.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18705v1" target="_blank" rel="noopener noreferrer">
                基于Transformer的显式运动信息挖掘在动作识别中的复兴
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Renaissance of Explicit Motion Information Mining from Transformers for Action Recognition
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Peiqin Zhuang, Lei Bai, Yichao Wu, Ding Liang, Luping Zhou, Yali Wang, Wanli Ouy...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的动作识别，主要涉及视频理解和运动分析，与推荐系统、搜索或广告的核心技术关联性较弱。虽然Transformer架构是相关技术，但该研究主要针对视觉模态的运动建模，缺乏明确的推荐/搜索/广告应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:01:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18705v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18705v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recently, action recognition has been dominated by transformer-based methods, thanks to their spatiotemporal contextual aggregation capacities. However, despite the significant progress achieved on scene-related datasets, they do not perform well on motion-sensitive datasets due to the lack of elaborate motion modeling designs. Meanwhile, we observe that the widely-used cost volume in traditional action recognition is highly similar to the affinity matrix defined in self-attention, but equipped with powerful motion modeling capacities. In light of this, we propose to integrate those effective motion modeling properties into the existing transformer in a unified and neat way, with the proposal of the Explicit Motion Information Mining module (EMIM). In EMIM, we propose to construct the desirable affinity matrix in a cost volume style, where the set of key candidate tokens is sampled from the query-based neighboring area in the next frame in a sliding-window manner. Then, the constructed affinity matrix is used to aggregate contextual information for appearance modeling and is converted into motion features for motion modeling as well. We validate the motion modeling capacities of our method on four widely-used datasets, and our method performs better than existing state-of-the-art approaches, especially on motion-sensitive datasets, i.e., Something-Something V1 & V2.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18692v1" target="_blank" rel="noopener noreferrer">
                MoGA：用于端到端长视频生成的混合分组注意力机制
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MoGA: Mixture-of-Groups Attention for End-to-End Long Video Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Weinan Jia, Yuning Lu, Mengqi Huang, Hualiang Wang, Binyuan Huang, Nan Chen, Mu ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频生成领域，虽然涉及注意力机制和MoE相关技术，但其核心应用场景是视频内容生成而非推荐系统、搜索或广告。长视频生成与排名、个性化推荐等核心业务场景关联度较低，且属于被排除的AIGC/内容生成类别。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 14:50:42
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18692v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18692v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Long video generation with Diffusion Transformers (DiTs) is bottlenecked by the quadratic scaling of full attention with sequence length. Since attention is highly redundant, outputs are dominated by a small subset of query-key pairs. Existing sparse methods rely on blockwise coarse estimation, whose accuracy-efficiency trade-offs are constrained by block size. This paper introduces Mixture-of-Groups Attention (MoGA), an efficient sparse attention that uses a lightweight, learnable token router to precisely match tokens without blockwise estimation. Through semantic-aware routing, MoGA enables effective long-range interactions. As a kernel-free method, MoGA integrates seamlessly with modern attention stacks, including FlashAttention and sequence parallelism. Building on MoGA, we develop an efficient long video generation model that end-to-end produces minute-level, multi-shot, 480p videos at 24 fps, with a context length of approximately 580k. Comprehensive experiments on various video generation tasks validate the effectiveness of our approach.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18632v1" target="_blank" rel="noopener noreferrer">
                基于3D思考：从有限视角出发的几何想象驱动的空间推理
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Think with 3D: Geometric Imagination Grounded Spatial Reasoning from Limited Views
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhangquan Chen, Manyuan Zhang, Xinlei Yu, Xufang Luo, Mingze Sun, Zihao Pan, Yan...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注3D视觉和空间推理，属于纯粹的计算机视觉领域，与推荐系统、搜索或广告的核心技术没有直接关联。虽然几何想象和推理能力在概念上可能启发多模态建模，但论文本身没有显示出在异构数据统一建模或推荐/搜索应用方面的明确潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 13:36:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18632v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18632v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">I.2.10</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Though recent advances in vision-language models (VLMs) have achieved remarkable progress across a wide range of multimodal tasks, understanding 3D spatial relationships from limited views remains a significant challenge. Previous reasoning methods typically rely on pure text (e.g., topological cognitive maps) or on 2D visual cues. However, their limited representational capacity hinders performance in specific tasks that require 3D spatial imagination. To address this limitation, we propose 3DThinker, a framework that can effectively exploits the rich geometric information embedded within images while reasoning, like humans do. Our framework is the first to enable 3D mentaling during reasoning without any 3D prior input, and it does not rely on explicitly labeled 3D data for training. Specifically, our training consists of two stages. First, we perform supervised training to align the 3D latent generated by VLM while reasoning with that of a 3D foundation model (e.g., VGGT). Then, we optimize the entire reasoning trajectory solely based on outcome signals, thereby refining the underlying 3D mentaling. Extensive experiments across multiple benchmarks show that 3DThinker consistently outperforms strong baselines and offers a new perspective toward unifying 3D representations into multimodal reasoning. Our code will be available at https://github.com/zhangquanchen/3DThinker.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18596v1" target="_blank" rel="noopener noreferrer">
                CUARewardBench：用于评估计算机使用智能体奖励模型的基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CUARewardBench: A Benchmark for Evaluating Reward Models on Computer-using Agent
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haojia Lin, Xiaoyu Tan, Yulei Qin, Zihan Xu, Yuchen Shi, Zongyi Li, Gang Li, Sha...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于评估奖励模型的基准测试，主要涉及强化学习中的奖励建模问题。虽然奖励模型在理论上有潜在应用，但该论文专注于计算机使用智能体的通用评估，没有明确展示与推荐系统、搜索或广告领域的直接关联或具体应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 12:53:40
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18596v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18596v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.SE</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Computer-using agents (CUAs) enable task completion through natural interaction with operating systems and software interfaces. While script-based verifiers are widely adopted for evaluation, they suffer from limited scalability and inability to provide step-wise assessment. Reward models offer promising alternatives, but their effectiveness on CUA evaluation remains largely underexplored. To address this gap, we present CUARewardBench, comprising four key contributions: (1) First-ever Comprehensive CUA Reward Benchmark: We introduce the first benchmark for evaluating both outcome reward models (ORM) and process reward models (PRM) on CUA tasks, enabling systematic assessment across trajectory-level and step-level evaluation. (2) Diverse, Practical and Reliable Dataset: CUARewardBench encompasses trajectories from 10 software categories and 7 agent architectures with varying performance levels (25.9%-50.8% success rates). All trajectories are expertly annotated through carefully designed protocols, with rigorous quality control to ensure reliability and practical applicability. (3) Comprehensive Analysis and Insights: Through extensive experiments across 7 vision-language models and 3 prompt templates, we reveal critical limitations of current CUA RMs, including insufficient visual reasoning capabilities, knowledge deficiencies, and the superiority of general VLMs over specialized CUA models for reward evaluation. (4) Unanimous Prompt Ensemble (UPE): Based on the insights from our comprehensive analysis, we propose UPE, a novel ensemble method that significantly enhances reward model reliability through strict unanimous voting and strategic prompt-template configurations. UPE achieves 89.8% precision and 93.3% NPV for ORM, and 81.7% precision and 85.1% NPV for PRM, substantially outperforming single VLMs and traditional ensemble approaches.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18489v1" target="_blank" rel="noopener noreferrer">
                Mono4DGS-HDR：基于交替曝光单目视频的高动态范围4D高斯溅射
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Mono4DGS-HDR: High Dynamic Range 4D Gaussian Splatting from Alternating-exposure Monocular Videos
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jinfeng Liu, Lingtong Kong, Mi Zhou, Jinwen Chen, Dan Xu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于4D高斯溅射和HDR重建的计算机视觉技术，属于纯粹的视觉领域研究。虽然高斯溅射在3D表示方面有进展，但该工作主要针对单目视频的HDR重建，与推荐系统、搜索或广告的核心技术没有直接关联，也没有明确的Transformer架构或LLM应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 10:14:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18489v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18489v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce Mono4DGS-HDR, the first system for reconstructing renderable 4D high dynamic range (HDR) scenes from unposed monocular low dynamic range (LDR) videos captured with alternating exposures. To tackle such a challenging problem, we present a unified framework with two-stage optimization approach based on Gaussian Splatting. The first stage learns a video HDR Gaussian representation in orthographic camera coordinate space, eliminating the need for camera poses and enabling robust initial HDR video reconstruction. The second stage transforms video Gaussians into world space and jointly refines the world Gaussians with camera poses. Furthermore, we propose a temporal luminance regularization strategy to enhance the temporal consistency of the HDR appearance. Since our task has not been studied before, we construct a new evaluation benchmark using publicly available datasets for HDR video reconstruction. Extensive experiments demonstrate that Mono4DGS-HDR significantly outperforms alternative solutions adapted from state-of-the-art methods in both rendering quality and speed.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18457v1" target="_blank" rel="noopener noreferrer">
                视觉基础模型可作为潜在扩散模型的优秀分词器
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Vision Foundation Models Can Be Good Tokenizers for Latent Diffusion Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianci Bi, Xiaoyi Zhang, Yan Lu, Nanning Zheng
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉基础模型在扩散模型中的应用，属于视觉生成领域。虽然提到了基础模型和潜在表示，但缺乏与推荐系统、搜索或广告的直接关联。视觉生成技术可能间接影响广告创意生成，但这属于明确排除的非排名广告主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:30:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18457v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18457v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The performance of Latent Diffusion Models (LDMs) is critically dependent on the quality of their visual tokenizer. While recent works have explored incorporating Vision Foundation Models (VFMs) via distillation, we identify a fundamental flaw in this approach: it inevitably weakens the robustness of alignment with the original VFM, causing the aligned latents to deviate semantically under distribution shifts. In this paper, we bypass distillation by proposing a more direct approach: Vision Foundation Model Variational Autoencoder (VFM-VAE). To resolve the inherent tension between the VFM's semantic focus and the need for pixel-level fidelity, we redesign the VFM-VAE decoder with Multi-Scale Latent Fusion and Progressive Resolution Reconstruction blocks, enabling high-quality reconstruction from spatially coarse VFM features. Furthermore, we provide a comprehensive analysis of representation dynamics during diffusion training, introducing the proposed SE-CKNNA metric as a more precise tool for this diagnosis. This analysis allows us to develop a joint tokenizer-diffusion alignment strategy that dramatically accelerates convergence. Our innovations in tokenizer design and training strategy lead to superior performance and efficiency: our system reaches a gFID (w/o CFG) of 2.20 in merely 80 epochs (a 10x speedup over prior tokenizers). With continued training to 640 epochs, it further attains a gFID (w/o CFG) of 1.62, establishing direct VFM integration as a superior paradigm for LDMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18437v1" target="_blank" rel="noopener noreferrer">
                超越单张图像：基于检索自增强的无监督伪装目标检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Single Images: Retrieval Self-Augmented Unsupervised Camouflaged Object Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ji Du, Xin Wang, Fangwei Hao, Mingyang Yu, Chunyuan Chen, Jiesheng Wu, Bin Wang,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉中的伪装目标检测任务，虽然涉及检索技术，但核心是视觉检测问题而非推荐系统、搜索或广告应用。检索自增强方法可能对多模态建模有所启发，但与当前关注的LLM技术、Transformer架构进展或推荐系统核心算法缺乏直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:12:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18437v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18437v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    At the core of Camouflaged Object Detection (COD) lies segmenting objects from their highly similar surroundings. Previous efforts navigate this challenge primarily through image-level modeling or annotation-based optimization. Despite advancing considerably, this commonplace practice hardly taps valuable dataset-level contextual information or relies on laborious annotations. In this paper, we propose RISE, a RetrIeval SElf-augmented paradigm that exploits the entire training dataset to generate pseudo-labels for single images, which could be used to train COD models. RISE begins by constructing prototype libraries for environments and camouflaged objects using training images (without ground truth), followed by K-Nearest Neighbor (KNN) retrieval to generate pseudo-masks for each image based on these libraries. It is important to recognize that using only training images without annotations exerts a pronounced challenge in crafting high-quality prototype libraries. In this light, we introduce a Clustering-then-Retrieval (CR) strategy, where coarse masks are first generated through clustering, facilitating subsequent histogram-based image filtering and cross-category retrieval to produce high-confidence prototypes. In the KNN retrieval stage, to alleviate the effect of artifacts in feature maps, we propose Multi-View KNN Retrieval (MVKR), which integrates retrieval results from diverse views to produce more robust and precise pseudo-masks. Extensive experiments demonstrate that RISE outperforms state-of-the-art unsupervised and prompt-based methods. Code is available at https://github.com/xiaohainku/RISE.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18381v1" target="_blank" rel="noopener noreferrer">
                S2AP：面向对抗剪枝的分数空间锐度最小化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            S2AP: Score-space Sharpness Minimization for Adversarial Pruning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Giorgio Piras, Qi Zhao, Fabio Brau, Maura Pintor, Christian Wressnegger, Battist...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注模型剪枝和对抗鲁棒性优化，属于通用模型压缩技术范畴。虽然模型效率对推荐/搜索系统有间接价值，但该工作缺乏与推荐系统、搜索或广告领域的直接关联，也未涉及Transformer架构改进或LLM技术应用，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 07:55:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18381v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18381v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Adversarial pruning methods have emerged as a powerful tool for compressing neural networks while preserving robustness against adversarial attacks. These methods typically follow a three-step pipeline: (i) pretrain a robust model, (ii) select a binary mask for weight pruning, and (iii) finetune the pruned model. To select the binary mask, these methods minimize a robust loss by assigning an importance score to each weight, and then keep the weights with the highest scores. However, this score-space optimization can lead to sharp local minima in the robust loss landscape and, in turn, to an unstable mask selection, reducing the robustness of adversarial pruning methods. To overcome this issue, we propose a novel plug-in method for adversarial pruning, termed Score-space Sharpness-aware Adversarial Pruning (S2AP). Through our method, we introduce the concept of score-space sharpness minimization, which operates during the mask search by perturbing importance scores and minimizing the corresponding robust loss. Extensive experiments across various datasets, models, and sparsity levels demonstrate that S2AP effectively minimizes sharpness in score space, stabilizing the mask selection, and ultimately improving the robustness of adversarial pruning methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18357v1" target="_blank" rel="noopener noreferrer">
                以分组方式学习人-物交互
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Learning Human-Object Interaction as Groups
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiajun Hong, Jianan Wei, Wenguan Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注计算机视觉中的人-物交互识别，属于纯粹的视觉理解任务。虽然交互建模在概念上与推荐系统中的用户-物品交互有相似之处，但该工作缺乏明确的机制将其技术迁移到推荐、搜索或广告领域，且不涉及LLM或Transformer架构的创新。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 07:25:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18357v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18357v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Human-Object Interaction Detection (HOI-DET) aims to localize human-object pairs and identify their interactive relationships. To aggregate contextual cues, existing methods typically propagate information across all detected entities via self-attention mechanisms, or establish message passing between humans and objects with bipartite graphs. However, they primarily focus on pairwise relationships, overlooking that interactions in real-world scenarios often emerge from collective behaviors (multiple humans and objects engaging in joint activities). In light of this, we revisit relation modeling from a group view and propose GroupHOI, a framework that propagates contextual information in terms of geometric proximity and semantic similarity. To exploit the geometric proximity, humans and objects are grouped into distinct clusters using a learnable proximity estimator based on spatial features derived from bounding boxes. In each group, a soft correspondence is computed via self-attention to aggregate and dispatch contextual cues. To incorporate the semantic similarity, we enhance the vanilla transformer-based interaction decoder with local contextual cues from HO-pair features. Extensive experiments on HICO-DET and V-COCO benchmarks demonstrate the superiority of GroupHOI over the state-of-the-art methods. It also exhibits leading performance on the more challenging Nonverbal Interaction Detection (NVI-DET) task, which involves varied forms of higher-order interactions within groups.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18353v1" target="_blank" rel="noopener noreferrer">
                基于排名的偏好优化：从隐式用户反馈中优化扩散模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Ranking-based Preference Optimization for Diffusion Models from Implicit User Feedback
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yi-Lun Wu, Bo-Kai Ruan, Chiang Tseng, Hong-Han Shuai
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注扩散模型的偏好优化，属于AIGC和内容生成领域，与我的核心关注点（推荐系统、搜索、广告）相关性较弱。虽然涉及排名和隐式反馈，但这些概念在扩散模型中的应用与推荐系统/搜索中的排名优化有本质区别，缺乏直接的技术迁移潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 07:22:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18353v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18353v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Direct preference optimization (DPO) methods have shown strong potential in aligning text-to-image diffusion models with human preferences by training on paired comparisons. These methods improve training stability by avoiding the REINFORCE algorithm but still struggle with challenges such as accurately estimating image probabilities due to the non-linear nature of the sigmoid function and the limited diversity of offline datasets. In this paper, we introduce Diffusion Denoising Ranking Optimization (Diffusion-DRO), a new preference learning framework grounded in inverse reinforcement learning. Diffusion-DRO removes the dependency on a reward model by casting preference learning as a ranking problem, thereby simplifying the training objective into a denoising formulation and overcoming the non-linear estimation issues found in prior methods. Moreover, Diffusion-DRO uniquely integrates offline expert demonstrations with online policy-generated negative samples, enabling it to effectively capture human preferences while addressing the limitations of offline data. Comprehensive experiments show that Diffusion-DRO delivers improved generation quality across a range of challenging and unseen prompts, outperforming state-of-the-art baselines in both both quantitative metrics and user studies. Our source code and pre-trained models are available at https://github.com/basiclab/DiffusionDRO.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18346v1" target="_blank" rel="noopener noreferrer">
                AV-Master：双路径全面感知实现更优的视听问答
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AV-Master: Dual-Path Comprehensive Perception Makes Better Audio-Visual Question Answering
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiayu Zhang, Qilang Ye, Shuo Ye, Xun Lin, Zihan Song, Zitong Yu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视听问答任务，属于多模态学习领域，但主要关注音频和视觉模态的融合。虽然标题提到'双路径'架构，但其核心应用场景（视听问答）与推荐系统、搜索或广告的关联性较弱，没有明确的潜在应用路径。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 06:58:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18346v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18346v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Audio-Visual Question Answering (AVQA) requires models to effectively utilize both visual and auditory modalities to answer complex and diverse questions about audio-visual scenes. However, existing methods lack sufficient flexibility and dynamic adaptability in temporal sampling and modality preference awareness, making it difficult to focus on key information based on the question. This limits their reasoning capability in complex scenarios. To address these challenges, we propose a novel framework named AV-Master. It enhances the model's ability to extract key information from complex audio-visual scenes with substantial redundant content by dynamically modeling both temporal and modality dimensions. In the temporal dimension, we introduce a dynamic adaptive focus sampling mechanism that progressively focuses on audio-visual segments most relevant to the question, effectively mitigating redundancy and segment fragmentation in traditional sampling methods. In the modality dimension, we propose a preference-aware strategy that models each modality's contribution independently, enabling selective activation of critical features. Furthermore, we introduce a dual-path contrastive loss to reinforce consistency and complementarity across temporal and modality dimensions, guiding the model to learn question-specific cross-modal collaborative representations. Experiments on four large-scale benchmarks show that AV-Master significantly outperforms existing methods, especially in complex reasoning tasks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18345v1" target="_blank" rel="noopener noreferrer">
                GPTFace：通过跨度掩码和弱相关文本-图像数据的面部-语言Transformer生成式预训练
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GPTFace: Generative Pre-training of Facial-Linguistic Transformer by Span Masking and Weakly Correlated Text-image Data
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yudong Li, Hao Li, Xianxu Hou, Linlin Shen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注面部-语言多模态建模，属于视觉-语言交叉领域，但专注于面部这一特定视觉模态。虽然标题提到Transformer架构和预训练技术，但其应用场景（面部-语言）与推荐系统、搜索或广告的核心需求（用户行为建模、内容理解、个性化推荐）关联度较低。该技术可能对用户画像分析有间接启发，但直接相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 06:55:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18345v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18345v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Compared to the prosperity of pre-training models in natural image understanding, the research on large-scale pre-training models for facial knowledge learning is still limited. Current approaches mainly rely on manually assembled and annotated face datasets for training, but labeling such datasets is labor-intensive and the trained models have limited scalability beyond the training data. To address these limitations, we present a generative pre-training model for facial knowledge learning that leverages large-scale web-built data for training. We use texts and images containing human faces crawled from the internet and conduct pre-training on self-supervised tasks, including masked image/language modeling (MILM) and image-text matching (ITM). During the generation stage, we further utilize the image-text matching loss to pull the generation distribution towards the control signal for controllable image/text generation. Experimental results demonstrate that our model achieves comparable performance to state-of-the-art pre-training models for various facial downstream tasks, such as attribution classification and expression recognition. Furthermore, our approach is also applicable to a wide range of face editing tasks, including face attribute editing, expression manipulation, mask removal, and photo inpainting.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18321v1" target="_blank" rel="noopener noreferrer">
                超越单一模型：通过自适应令牌集成解码缓解多模态幻觉
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Single Models: Mitigating Multimodal Hallucinations via Adaptive Token Ensemble Decoding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jinlin Li, Yuran Wang, Yifei Yuan, Xiao Zhou, Yingying Zhang, Xixian Yong, Yefen...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多模态模型中的幻觉缓解问题，这属于纯粹的NLP评估和幻觉相关主题，被明确列为无关内容。虽然提到了多模态和集成方法，但核心焦点是幻觉缓解而非推荐系统、搜索或广告的应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 06:11:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18321v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18321v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Vision-Language Models (LVLMs) have recently achieved impressive results in multimodal tasks such as image captioning and visual question answering. However, they remain prone to object hallucination -- generating descriptions of nonexistent or misidentified objects. Prior work has partially mitigated this via auxiliary training objectives or external modules, but challenges remain in terms of scalability, adaptability, and model independence. To address these limitations, we propose Adaptive Token Ensemble Decoding (ATED), a training-free, token-level ensemble framework that mitigates hallucination by aggregating predictions from multiple LVLMs during inference. ATED dynamically computes uncertainty-based weights for each model, reflecting their reliability at each decoding step. It also integrates diverse decoding paths to improve contextual grounding and semantic consistency. Experiments on standard hallucination detection benchmarks demonstrate that ATED significantly outperforms state-of-the-art methods, reducing hallucination without compromising fluency or relevance. Our findings highlight the benefits of adaptive ensembling and point to a promising direction for improving LVLM robustness in high-stakes applications. The code is available at https://github.com/jinlin2021/ATED.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18303v1" target="_blank" rel="noopener noreferrer">
                面向医学多模态大语言模型的主动推理-检索框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Proactive Reasoning-with-Retrieval Framework for Medical Multimodal Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lehan Wang, Yi Qin, Honglong Yang, Xiaomeng Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文虽然涉及多模态LLM和检索增强生成技术，但其核心应用领域是医学，这属于明确的无关主题。论文中提到的推理-检索框架可能对搜索系统有潜在启发，但医学领域的特定应用使其与当前关注点相关性很低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 05:18:18
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18303v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18303v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Incentivizing the reasoning ability of Multimodal Large Language Models (MLLMs) is essential for medical applications to transparently analyze medical scans and provide reliable diagnosis. However, existing medical MLLMs rely solely on internal knowledge during reasoning, leading to hallucinated reasoning and factual inaccuracies when encountering cases beyond their training scope. Although recent Agentic Retrieval-Augmented Generation (RAG) methods elicit the medical model's proactive retrieval ability during reasoning, they are confined to unimodal LLMs, neglecting the crucial visual information during reasoning and retrieval. Consequently, we propose the first Multimodal Medical Reasoning-with-Retrieval framework, Med-RwR, which actively retrieves external knowledge by querying observed symptoms or domain-specific medical concepts during reasoning. Specifically, we design a two-stage reinforcement learning strategy with tailored rewards that stimulate the model to leverage both visual diagnostic findings and textual clinical information for effective retrieval. Building on this foundation, we further propose a Confidence-Driven Image Re-retrieval (CDIR) method for test-time scaling when low prediction confidence is detected. Evaluation on various public medical benchmarks demonstrates Med-RwR's significant improvements over baseline models, proving the effectiveness of enhancing reasoning capabilities with external knowledge integration. Furthermore, Med-RwR demonstrates remarkable generalizability to unfamiliar domains, evidenced by 8.8% performance gain on our proposed EchoCardiography Benchmark (ECBench), despite the scarcity of echocardiography data in the training corpus. Our data, model, and codes will be made publicly available at https://github.com/xmed-lab/Med-RwR.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18291v1" target="_blank" rel="noopener noreferrer">
                GeoDiff：用于度量深度估计的几何引导扩散模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GeoDiff: Geometry-Guided Diffusion for Metric Depth Estimation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tuan Pham, Thanh-Tung Le, Xiaohui Xie, Stephan Mandt
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的深度估计任务，属于纯粹的视觉领域研究。虽然扩散模型是生成模型的重要进展，但该工作没有展示与推荐系统、搜索或广告的明确联系。深度估计技术可能在某些特定场景（如AR/VR购物）中有间接应用，但这种关联性较弱且不直接。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 04:47:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18291v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18291v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce a novel framework for metric depth estimation that enhances pretrained diffusion-based monocular depth estimation (DB-MDE) models with stereo vision guidance. While existing DB-MDE methods excel at predicting relative depth, estimating absolute metric depth remains challenging due to scale ambiguities in single-image scenarios. To address this, we reframe depth estimation as an inverse problem, leveraging pretrained latent diffusion models (LDMs) conditioned on RGB images, combined with stereo-based geometric constraints, to learn scale and shift for accurate depth recovery. Our training-free solution seamlessly integrates into existing DB-MDE frameworks and generalizes across indoor, outdoor, and complex environments. Extensive experiments demonstrate that our approach matches or surpasses state-of-the-art methods, particularly in challenging scenarios involving translucent and specular surfaces, all without requiring retraining.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18269v1" target="_blank" rel="noopener noreferrer">
                StreamingTOM：用于高效视频理解的流式令牌压缩
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            StreamingTOM: Streaming Token Compression for Efficient Video Understanding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xueyi Chen, Keda Tao, Kele Shao, Huan Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频理解和令牌压缩技术，属于计算机视觉领域，与推荐系统、搜索或广告的核心关注点没有直接关联。虽然令牌压缩技术可能对处理长序列有一定启发，但论文明确针对视频模态，在推荐/搜索/广告领域的直接应用潜力有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 03:39:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18269v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18269v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Unlike offline processing, streaming video vision-language models face two fundamental constraints: causality and accumulation. Causality prevents access to future frames that offline methods exploit, while accumulation causes tokens to grow unbounded, creating efficiency bottlenecks. However, existing approaches only regulate post-LLM kv-cache, leaving costly pre-LLM prefill unchanged. We introduce StreamingTOM, a training-free, plug-and-play two-stage framework that addresses both pre-LLM and post-LLM bottlenecks with predictable latency. Causal Temporal Reduction imposes a fixed per-frame budget and selects tokens based on adjacent-frame changes and token saliency, drastically reducing per-frame prefill cost by processing only a compact subset of visual tokens per frame instead of all visual tokens. Online Quantized Memory stores tokens in 4-bit format, retrieves relevant groups on demand, and dequantizes them, keeping the active kv-cache bounded regardless of stream length. Experiments demonstrate our method achieves $15.7\times$ kv-cache compression, $1.2\times$ lower peak memory and $2\times$ faster TTFT compared to prior SOTA. StreamingTOM maintains state-of-the-art accuracy among training-free methods with an average of $63.8\%$ on offline benchmarks and $55.8\%/3.7$ on RVS. These results highlight the practical benefits of our two-stage approach for efficient streaming video understanding with bounded growth.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18253v1" target="_blank" rel="noopener noreferrer">
                OpenInsGaussian：基于上下文感知跨视图融合的开放词汇实例高斯分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            OpenInsGaussian: Open-vocabulary Instance Gaussian Segmentation with Context-aware Cross-view Fusion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianyu Huang, Runnan Chen, Dongting Hu, Fengming Huang, Mingming Gong, Tongliang...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉领域的开放词汇实例分割和3D高斯表示，属于纯粹的视觉技术范畴。虽然标题提到'上下文感知'和'跨视图融合'，但这些技术主要针对视觉场景理解，没有明确指向推荐系统、搜索或广告领域的应用场景，与当前关注的LLM技术、Transformer架构或异构数据建模缺乏直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 03:24:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18253v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18253v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Understanding 3D scenes is pivotal for autonomous driving, robotics, and augmented reality. Recent semantic Gaussian Splatting approaches leverage large-scale 2D vision models to project 2D semantic features onto 3D scenes. However, they suffer from two major limitations: (1) insufficient contextual cues for individual masks during preprocessing and (2) inconsistencies and missing details when fusing multi-view features from these 2D models. In this paper, we introduce \textbf{OpenInsGaussian}, an \textbf{Open}-vocabulary \textbf{Ins}tance \textbf{Gaussian} segmentation framework with Context-aware Cross-view Fusion. Our method consists of two modules: Context-Aware Feature Extraction, which augments each mask with rich semantic context, and Attention-Driven Feature Aggregation, which selectively fuses multi-view features to mitigate alignment errors and incompleteness. Through extensive experiments on benchmark datasets, OpenInsGaussian achieves state-of-the-art results in open-vocabulary 3D Gaussian segmentation, outperforming existing baselines by a large margin. These findings underscore the robustness and generality of our proposed approach, marking a significant step forward in 3D scene understanding and its practical deployment across diverse real-world scenarios.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18244v1" target="_blank" rel="noopener noreferrer">
                BlendCLIP：通过多模态预训练桥接合成与真实域以实现零样本3D物体分类
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            BlendCLIP: Bridging Synthetic and Real Domains for Zero-Shot 3D Object Classification with Multimodal Pretraining
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ajinkya Khoche, Gergő László Nagy, Maciej Wozniak, Thomas Gustafsson, Patric Jen...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注3D物体分类和跨域适应，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术关联性较弱。虽然涉及多模态预训练技术，但其应用场景（3D视觉）和问题定义（零样本分类）在RecSys/Search/Ads中的直接应用潜力有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 03:08:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18244v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18244v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Zero-shot 3D object classification is crucial for real-world applications like autonomous driving, however it is often hindered by a significant domain gap between the synthetic data used for training and the sparse, noisy LiDAR scans encountered in the real-world. Current methods trained solely on synthetic data fail to generalize to outdoor scenes, while those trained only on real data lack the semantic diversity to recognize rare or unseen objects. We introduce BlendCLIP, a multimodal pretraining framework that bridges this synthetic-to-real gap by strategically combining the strengths of both domains. We first propose a pipeline to generate a large-scale dataset of object-level triplets -- consisting of a point cloud, image, and text description -- mined directly from real-world driving data and human annotated 3D boxes. Our core contribution is a curriculum-based data mixing strategy that first grounds the model in the semantically rich synthetic CAD data before progressively adapting it to the specific characteristics of real-world scans. Our experiments show that our approach is highly label-efficient: introducing as few as 1.5\% real-world samples per batch into training boosts zero-shot accuracy on the nuScenes benchmark by 27\%. Consequently, our final model achieves state-of-the-art performance on challenging outdoor datasets like nuScenes and TruckScenes, improving over the best prior method by 19.3\% on nuScenes, while maintaining strong generalization on diverse synthetic benchmarks. Our findings demonstrate that effective domain adaptation, not full-scale real-world annotation, is the key to unlocking robust open-vocabulary 3D perception. Our code and dataset will be released upon acceptance on https://github.com/kesu1/BlendCLIP.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18234v1" target="_blank" rel="noopener noreferrer">
                DeepSeek-OCR：上下文光学压缩
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DeepSeek-OCR: Contexts Optical Compression
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haoran Wei, Yaofeng Sun, Yukun Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题表明其专注于光学字符识别（OCR）和图像压缩技术，这属于计算机视觉领域。虽然OCR在文档处理中有应用，但该论文没有显示出与推荐系统、搜索或广告排名的直接相关性，也没有涉及LLM技术或Transformer架构的进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 02:41:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18234v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18234v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present DeepSeek-OCR as an initial investigation into the feasibility of compressing long contexts via optical 2D mapping. DeepSeek-OCR consists of two components: DeepEncoder and DeepSeek3B-MoE-A570M as the decoder. Specifically, DeepEncoder serves as the core engine, designed to maintain low activations under high-resolution input while achieving high compression ratios to ensure an optimal and manageable number of vision tokens. Experiments show that when the number of text tokens is within 10 times that of vision tokens (i.e., a compression ratio < 10x), the model can achieve decoding (OCR) precision of 97%. Even at a compression ratio of 20x, the OCR accuracy still remains at about 60%. This shows considerable promise for research areas such as historical long-context compression and memory forgetting mechanisms in LLMs. Beyond this, DeepSeek-OCR also demonstrates high practical value. On OmniDocBench, it surpasses GOT-OCR2.0 (256 tokens/page) using only 100 vision tokens, and outperforms MinerU2.0 (6000+ tokens per page on average) while utilizing fewer than 800 vision tokens. In production, DeepSeek-OCR can generate training data for LLMs/VLMs at a scale of 200k+ pages per day (a single A100-40G). Codes and model weights are publicly accessible at http://github.com/deepseek-ai/DeepSeek-OCR.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18229v1" target="_blank" rel="noopener noreferrer">
                超越频率：通过蓝图提示图像合成实现基于评分驱动的目标检测去偏
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Frequency: Scoring-Driven Debiasing for Object Detection via Blueprint-Prompted Image Synthesis
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xinhao Cai, Liulei Li, Gensheng Pei, Tao Chen, Jinshan Pan, Yazhou Yao, Wenguan ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉领域的目标检测去偏问题，使用图像合成技术解决频率偏差。虽然去偏概念在推荐系统中也存在，但该论文的方法论和核心技术创新（蓝图提示图像合成）主要针对视觉检测任务，与推荐/搜索/广告系统的核心架构、Transformer技术或LLM应用缺乏直接关联。图像合成技术可能间接启发多模态推荐，但连接过于薄弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 02:19:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18229v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18229v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper presents a generation-based debiasing framework for object detection. Prior debiasing methods are often limited by the representation diversity of samples, while naive generative augmentation often preserves the biases it aims to solve. Moreover, our analysis reveals that simply generating more data for rare classes is suboptimal due to two core issues: i) instance frequency is an incomplete proxy for the true data needs of a model, and ii) current layout-to-image synthesis lacks the fidelity and control to generate high-quality, complex scenes. To overcome this, we introduce the representation score (RS) to diagnose representational gaps beyond mere frequency, guiding the creation of new, unbiased layouts. To ensure high-quality synthesis, we replace ambiguous text prompts with a precise visual blueprint and employ a generative alignment strategy, which fosters communication between the detector and generator. Our method significantly narrows the performance gap for underrepresented object groups, \eg, improving large/rare instances by 4.4/3.6 mAP over the baseline, and surpassing prior L2I synthesis models by 15.9 mAP for layout accuracy in generated images.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18187v1" target="_blank" rel="noopener noreferrer">
                VelocityNet：通过个体特定速度分析实现实时人群异常检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VelocityNet: Real-Time Crowd Anomaly Detection via Person-Specific Velocity Analysis
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fatima AlGhamdi, Omar Alharbi, Abdullah Aldwyish, Raied Aljadaany, Muhammad Kamr...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的人群异常检测，虽然涉及实时分析技术，但与推荐系统、搜索或广告的核心技术领域没有直接关联。其基于速度分析的方法在用户行为建模或序列分析方面缺乏明确的跨模态应用潜力，无法直接应用于推荐、搜索或广告场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 00:26:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18187v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18187v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Detecting anomalies in crowded scenes is challenging due to severe inter-person occlusions and highly dynamic, context-dependent motion patterns. Existing approaches often struggle to adapt to varying crowd densities and lack interpretable anomaly indicators. To address these limitations, we introduce VelocityNet, a dual-pipeline framework that combines head detection and dense optical flow to extract person-specific velocities. Hierarchical clustering categorizes these velocities into semantic motion classes (halt, slow, normal, and fast), and a percentile-based anomaly scoring system measures deviations from learned normal patterns. Experiments demonstrate the effectiveness of our framework in real-time detection of diverse anomalous motion patterns within densely crowded environments.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18394v1" target="_blank" rel="noopener noreferrer">
                审查瓶颈点：互联网上区域监控、审查与影响的新战场
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Censorship Chokepoints: New Battlegrounds for Regional Surveillance, Censorship and Influence on the Internet
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yong Zhang, Nishanth Sastry
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确聚焦于互联网审查、监控和影响力操作等主题，这些完全属于用户指定的无关主题范畴（安全、隐私、公平性等非技术性话题）。该内容与推荐系统、搜索、广告或相关使能技术没有任何技术关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 08:14:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18394v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18394v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CR</span><span class="category-tag">cs.IR</span><span class="category-tag">cs.NI</span><span class="category-tag">cs.SI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Undoubtedly, the Internet has become one of the most important conduits to information for the general public. Nonetheless, Internet access can be and has been limited systematically or blocked completely during political events in numerous countries and regions by various censorship mechanisms. Depending on where the core filtering component is situated, censorship techniques have been classified as client-based, server-based, or network-based. However, as the Internet evolves rapidly, new and sophisticated censorship techniques have emerged, which involve techniques that cut across locations and involve new forms of hurdles to information access. We argue that modern censorship can be better understood through a new lens that we term chokepoints, which identifies bottlenecks in the content production or delivery cycle where efficient new forms of large-scale client-side surveillance and filtering mechanisms have emerged.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18355v1" target="_blank" rel="noopener noreferrer">
                KrishokBondhu：一个面向孟加拉农民的基于检索增强的语音农业咨询呼叫中心
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            KrishokBondhu: A Retrieval-Augmented Voice-Based Agricultural Advisory Call Center for Bengali Farmers
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mohd Ruhul Ameen, Akif Islam, Farjana Aktar, M. Saifuzzaman Rafat
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于农业领域的语音咨询系统，属于特定领域应用，与推荐系统、搜索或广告的核心技术无关。虽然提到了检索增强技术，但应用场景是农业咨询而非RecSys/Search/Ads领域，因此不符合任何关注点。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 07:24:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18355v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18355v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.HC</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In Bangladesh, many farmers continue to face challenges in accessing timely, expert-level agricultural guidance. This paper presents KrishokBondhu, a voice-enabled, call-centre-integrated advisory platform built on a Retrieval-Augmented Generation (RAG) framework, designed specifically for Bengali-speaking farmers. The system aggregates authoritative agricultural handbooks, extension manuals, and NGO publications; applies Optical Character Recognition (OCR) and document-parsing pipelines to digitize and structure the content; and indexes this corpus in a vector database for efficient semantic retrieval. Through a simple phone-based interface, farmers can call the system to receive real-time, context-aware advice: speech-to-text converts the Bengali query, the RAG module retrieves relevant content, a large language model (Gemma 3-4B) generates a context-grounded response, and text-to-speech delivers the answer in natural spoken Bengali. In a pilot evaluation, KrishokBondhu produced high-quality responses for 72.7% of diverse agricultural queries covering crop management, disease control, and cultivation practices. Compared to the KisanQRS benchmark, the system achieved a composite score of 4.53 (vs. 3.13) on a 5-point scale, a 44.7% improvement, with especially large gains in contextual richness (+367%) and completeness (+100.4%), while maintaining comparable relevance and technical specificity. Semantic similarity analysis further revealed a strong correlation between retrieved context and answer quality, emphasizing the importance of grounding generative responses in curated documentation. KrishokBondhu demonstrates the feasibility of integrating call-centre accessibility, multilingual voice interaction, and modern RAG techniques to deliver expert-level agricultural guidance to remote Bangladeshi farmers, paving the way toward a fully AI-driven agricultural advisory ecosystem.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18779v1" target="_blank" rel="noopener noreferrer">
                KAT-Coder技术报告
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            KAT-Coder Technical Report
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zizheng Zhan, Ken Deng, Xiaojiang Zhang, Jinghui Wang, Huaixi Tang, Zhiyi Lai, H...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">标题过于模糊，仅表明这是一个技术报告，没有任何具体的技术领域或方法暗示。无法判断是否涉及推荐系统、搜索、广告或相关使能技术。缺乏足够信息来评估与您关注领域的潜在关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 16:27:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18779v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18779v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in large language models (LLMs) have enabled progress in agentic coding, where models autonomously reason, plan, and act within interactive software development workflows. However, bridging the gap between static text-based training and dynamic real-world agentic execution remains a core challenge. In this technical report, we present KAT-Coder, a large-scale agentic code model trained through a multi-stage curriculum encompassing Mid-Term Training, Supervised Fine-Tuning (SFT), Reinforcement Fine-Tuning (RFT), and Reinforcement-to-Deployment Adaptation. The Mid-Term stage enhances reasoning, planning, and reflection capabilities through a corpus of real software engineering data and synthetic agentic interactions. The SFT stage constructs a million-sample dataset balancing twenty programming languages, ten development contexts, and ten task archetypes. The RFT stage introduces a novel multi-ground-truth reward formulation for stable and sample-efficient policy optimization. Finally, the Reinforcement-to-Deployment phase adapts the model to production-grade IDE environments using Error-Masked SFT and Tree-Structured Trajectory Training. In summary, these stages enable KAT-Coder to achieve robust tool-use reliability, instruction alignment, and long-context reasoning, forming a deployable foundation for real-world intelligent coding agents. Our KAT series 32B model, KAT-Dev, has been open-sourced on https://huggingface.co/Kwaipilot/KAT-Dev.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18774v1" target="_blank" rel="noopener noreferrer">
                人工智能在美国报纸中的应用广泛但不均衡，且很少公开披露
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AI use in American newspapers is widespread, uneven, and rarely disclosed
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jenna Russell, Marzena Karpinska, Destiny Akinode, Katherine Thai, Bradley Emi, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注AI在新闻行业的应用现状和透明度问题，属于媒体行业应用研究。这与我的核心关注领域（推荐系统、搜索、广告中的技术进展）没有直接关联，也不涉及LLM技术、Transformer架构或异构数据处理等关键技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 16:22:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18774v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18774v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    AI is rapidly transforming journalism, but the extent of its use in published newspaper articles remains unclear. We address this gap by auditing a large-scale dataset of 186K articles from online editions of 1.5K American newspapers published in the summer of 2025. Using Pangram, a state-of-the-art AI detector, we discover that approximately 9% of newly-published articles are either partially or fully AI-generated. This AI use is unevenly distributed, appearing more frequently in smaller, local outlets, in specific topics such as weather and technology, and within certain ownership groups. We also analyze 45K opinion pieces from Washington Post, New York Times, and Wall Street Journal, finding that they are 6.4 times more likely to contain AI-generated content than news articles from the same publications, with many AI-flagged op-eds authored by prominent public figures. Despite this prevalence, we find that AI use is rarely disclosed: a manual audit of 100 AI-flagged articles found only five disclosures of AI use. Overall, our audit highlights the immediate need for greater transparency and updated editorial standards regarding the use of AI in journalism to maintain public trust.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18724v1" target="_blank" rel="noopener noreferrer">
                代码切换语音中语言平衡的自适应调整
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Adapting Language Balance in Code-Switching Speech
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Enes Yavuz Ugan, Ngoc-Quan Pham, Alexander Waibel
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于语音处理中的代码切换技术，属于语音领域的特定应用。虽然代码切换涉及多语言处理，但论文明确聚焦于语音而非文本，且没有显示出与推荐系统、搜索或广告的潜在应用关联。该主题属于语音处理领域，不在当前关注的技术范畴内。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:23:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18724v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18724v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span><span class="category-tag">cs.SD</span><span class="category-tag">eess.AS</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Despite achieving impressive results on standard benchmarks, large foundational models still struggle against code-switching test cases. When data scarcity cannot be used as the usual justification for poor performance, the reason may lie in the infrequent occurrence of code-switched moments, where the embedding of the second language appears subtly. Instead of expecting the models to learn this infrequency on their own, it might be beneficial to provide the training process with labels. Evaluating model performance on code-switching data requires careful localization of code-switching points where recognition errors are most consequential, so that the analysis emphasizes mistakes occurring at those moments. Building on this observation, we leverage the difference between the embedded and the main language to highlight those code-switching points and thereby emphasize learning at those locations. This simple yet effective differentiable surrogate mitigates context bias during generation -- the central challenge in code-switching -- thereby improving the model's robustness. Our experiments with Arabic and Chinese-English showed that the models are able to predict the switching places more correctly, reflected by the reduced substitution error.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18629v1" target="_blank" rel="noopener noreferrer">
                基于超声舌运动学的动态模型参数
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Dynamical model parameters from ultrasound tongue kinematics
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sam Kirkham, Patrycja Strycharczuk
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文涉及从超声舌运动学数据中提取动态模型参数，这属于生物医学工程和语音处理的交叉领域。该主题与推荐系统、搜索或广告的核心技术领域完全无关，也不涉及LLM、Transformer架构或异构数据建模等任何相关技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 13:34:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18629v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18629v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The control of speech can be modelled as a dynamical system in which articulators are driven toward target positions. These models are typically evaluated using fleshpoint data, such as electromagnetic articulography (EMA), but recent methodological advances make ultrasound imaging a promising alternative. We evaluate whether the parameters of a linear harmonic oscillator can be reliably estimated from ultrasound tongue kinematics and compare these with parameters estimated from simultaneously-recorded EMA data. We find that ultrasound and EMA yield comparable dynamical parameters, while mandibular short tendon tracking also adequately captures jaw motion. This supports using ultrasound kinematics to evaluate dynamical articulatory models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18582v1" target="_blank" rel="noopener noreferrer">
                超越显性表达：用于社交媒体非人化检测的双语数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond the Explicit: A Bilingual Dataset for Dehumanization Detection in Social Media
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dennis Assenmacher, Paloma Piot, Katarina Laken, David Jurgens, Claudia Wagner
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于社交媒体中的非人化检测，这属于内容安全和社会计算领域，与推荐系统、搜索或广告的核心技术进展无关。论文内容涉及伦理和社会问题检测，属于明确排除的无关主题范畴，对当前关注的技术方向没有直接或间接的应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 12:35:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18582v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18582v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Digital dehumanization, although a critical issue, remains largely overlooked within the field of computational linguistics and Natural Language Processing. The prevailing approach in current research concentrating primarily on a single aspect of dehumanization that identifies overtly negative statements as its core marker. This focus, while crucial for understanding harmful online communications, inadequately addresses the broader spectrum of dehumanization. Specifically, it overlooks the subtler forms of dehumanization that, despite not being overtly offensive, still perpetuate harmful biases against marginalized groups in online interactions. These subtler forms can insidiously reinforce negative stereotypes and biases without explicit offensiveness, making them harder to detect yet equally damaging. Recognizing this gap, we use different sampling methods to collect a theory-informed bilingual dataset from Twitter and Reddit. Using crowdworkers and experts to annotate 16,000 instances on a document- and span-level, we show that our dataset covers the different dimensions of dehumanization. This dataset serves as both a training resource for machine learning models and a benchmark for evaluating future dehumanization detection techniques. To demonstrate its effectiveness, we fine-tune ML models on this dataset, achieving performance that surpasses state-of-the-art models in zero and few-shot in-context settings.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18556v1" target="_blank" rel="noopener noreferrer">
                构建临床大语言模型的可信度：偏见分析与数据集透明度
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Building Trust in Clinical LLMs: Bias Analysis and Dataset Transparency
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Svetlana Maslenkova, Clement Christophe, Marco AF Pimentel, Tathagata Raha, Muha...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于临床医学领域的LLM应用，这属于明确的无关主题（医疗领域特定应用）。论文讨论的偏见分析和数据集透明度属于公平性、伦理等非技术性话题，与搜索、推荐、广告等核心领域的技术进展无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 12:08:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18556v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18556v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models offer transformative potential for healthcare, yet their responsible and equitable development depends critically on a deeper understanding of how training data characteristics influence model behavior, including the potential for bias. Current practices in dataset curation and bias assessment often lack the necessary transparency, creating an urgent need for comprehensive evaluation frameworks to foster trust and guide improvements. In this study, we present an in-depth analysis of potential downstream biases in clinical language models, with a focus on differential opioid prescription tendencies across diverse demographic groups, such as ethnicity, gender, and age. As part of this investigation, we introduce HC4: Healthcare Comprehensive Commons Corpus, a novel and extensively curated pretraining dataset exceeding 89 billion tokens. Our evaluation leverages both established general benchmarks and a novel, healthcare-specific methodology, offering crucial insights to support fairness and safety in clinical AI applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18475v1" target="_blank" rel="noopener noreferrer">
                DART：一个用于临床自然语言处理的意大利语监管药物文档结构化数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DART: A Structured Dataset of Regulatory Drug Documents in Italian for Clinical NLP
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mariano Barone, Antonio Laudante, Giuseppe Riccio, Antonio Romano, Marco Postigl...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于临床领域的药物文档处理，属于医学/生物领域的特定应用，与推荐系统、搜索或广告的核心技术无关。论文内容涉及临床NLP和药物监管文档，这些都属于明确的无关主题范畴，没有任何潜在的应用于RecSys/Search/Ads的技术价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:53:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18475v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18475v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The extraction of pharmacological knowledge from regulatory documents has become a key focus in biomedical natural language processing, with applications ranging from adverse event monitoring to AI-assisted clinical decision support. However, research in this field has predominantly relied on English-language corpora such as DrugBank, leaving a significant gap in resources tailored to other healthcare systems. To address this limitation, we introduce DART (Drug Annotation from Regulatory Texts), the first structured corpus of Italian Summaries of Product Characteristics derived from the official repository of the Italian Medicines Agency (AIFA). The dataset was built through a reproducible pipeline encompassing web-scale document retrieval, semantic segmentation of regulatory sections, and clinical summarization using a few-shot-tuned large language model with low-temperature decoding. DART provides structured information on key pharmacological domains such as indications, adverse drug reactions, and drug-drug interactions. To validate its utility, we implemented an LLM-based drug interaction checker that leverages the dataset to infer clinically meaningful interactions. Experimental results show that instruction-tuned LLMs can accurately infer potential interactions and their clinical implications when grounded in the structured textual fields of DART. We publicly release our code on GitHub: https://github.com/PRAISELab-PicusLab/DART.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18468v1" target="_blank" rel="noopener noreferrer">
                IMB：一个用于问答的意大利医学基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            IMB: An Italian Medical Benchmark for Question Answering
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Antonio Romano, Giuseppe Riccio, Mariano Barone, Marco Postiglione, Vincenzo Mos...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的问答基准测试，属于明确的医学领域特定应用。这与我的关注点完全无关，我的关注点明确排除了医学、生物学、化学、物理学或其他领域特定应用，以及纯粹的评估基准或NLP中心主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:45:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18468v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18468v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Online medical forums have long served as vital platforms where patients seek professional healthcare advice, generating vast amounts of valuable knowledge. However, the informal nature and linguistic complexity of forum interactions pose significant challenges for automated question answering systems, especially when dealing with non-English languages. We present two comprehensive Italian medical benchmarks: \textbf{IMB-QA}, containing 782,644 patient-doctor conversations from 77 medical categories, and \textbf{IMB-MCQA}, comprising 25,862 multiple-choice questions from medical specialty examinations. We demonstrate how Large Language Models (LLMs) can be leveraged to improve the clarity and consistency of medical forum data while retaining their original meaning and conversational style, and compare a variety of LLM architectures on both open and multiple-choice question answering tasks. Our experiments with Retrieval Augmented Generation (RAG) and domain-specific fine-tuning reveal that specialized adaptation strategies can outperform larger, general-purpose models in medical question answering tasks. These findings suggest that effective medical AI systems may benefit more from domain expertise and efficient information retrieval than from increased model scale. We release both datasets and evaluation frameworks in our GitHub repository to support further research on multilingual medical question answering: https://github.com/PRAISELab-PicusLab/IMB.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18466v1" target="_blank" rel="noopener noreferrer">
                CEFR标注的WordNet：基于LLM的面向语言学习的熟练度引导语义数据库
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CEFR-Annotated WordNet: LLM-Based Proficiency-Guided Semantic Database for Language Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Masato Kikuchi, Masatsugu Ono, Toshioki Soga, Tetsu Tanabe, Tadachika Ozono
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于语言学习领域的特定应用，构建了一个基于CEFR标准的语义数据库，这属于教育技术领域。虽然涉及LLM技术，但应用场景与推荐系统、搜索或广告完全无关，且没有展示任何在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:42:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18466v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18466v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Although WordNet is a valuable resource owing to its structured semantic networks and extensive vocabulary, its fine-grained sense distinctions can be challenging for second-language learners. To address this, we developed a WordNet annotated with the Common European Framework of Reference for Languages (CEFR), integrating its semantic networks with language-proficiency levels. We automated this process using a large language model to measure the semantic similarity between sense definitions in WordNet and entries in the English Vocabulary Profile Online. To validate our method, we constructed a large-scale corpus containing both sense and CEFR-level information from our annotated WordNet and used it to develop contextual lexical classifiers. Our experiments demonstrate that models fine-tuned on our corpus perform comparably to those trained on gold-standard annotations. Furthermore, by combining our corpus with the gold-standard data, we developed a practical classifier that achieves a Macro-F1 score of 0.81, indicating the high accuracy of our annotations. Our annotated WordNet, corpus, and classifiers are publicly available to help bridge the gap between natural language processing and language education, thereby facilitating more effective and efficient language learning.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18439v1" target="_blank" rel="noopener noreferrer">
                基于视觉信号还是猜测？视觉信号在检测手语翻译中的幻觉现象
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Grounding or Guessing? Visual Signals for Detecting Hallucinations in Sign Language Translation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yasser Hamidullah, Koel Dutta Chowdury, Yusser Al-Ghussin, Shakib Yazdani, Cenne...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于手语翻译中的幻觉检测，这属于特定领域（手语）的翻译质量问题评估。虽然涉及视觉信号，但与推荐系统、搜索或广告中的异构数据建模没有直接关联。该工作主要关注翻译准确性的技术问题，而非推荐/搜索/广告领域所需的核心技术或架构创新。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:13:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18439v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18439v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Hallucination, where models generate fluent text unsupported by visual evidence, remains a major flaw in vision-language models and is particularly critical in sign language translation (SLT). In SLT, meaning depends on precise grounding in video, and gloss-free models are especially vulnerable because they map continuous signer movements directly into natural language without intermediate gloss supervision that serves as alignment. We argue that hallucinations arise when models rely on language priors rather than visual input. To capture this, we propose a token-level reliability measure that quantifies how much the decoder uses visual information. Our method combines feature-based sensitivity, which measures internal changes when video is masked, with counterfactual signals, which capture probability differences between clean and altered video inputs. These signals are aggregated into a sentence-level reliability score, providing a compact and interpretable measure of visual grounding. We evaluate the proposed measure on two SLT benchmarks (PHOENIX-2014T and CSL-Daily) with both gloss-based and gloss-free models. Our results show that reliability predicts hallucination rates, generalizes across datasets and architectures, and decreases under visual degradations. Beyond these quantitative trends, we also find that reliability distinguishes grounded tokens from guessed ones, allowing risk estimation without references; when combined with text-based signals (confidence, perplexity, or entropy), it further improves hallucination risk estimation. Qualitative analysis highlights why gloss-free models are more susceptible to hallucinations. Taken together, our findings establish reliability as a practical and reusable tool for diagnosing hallucinations in SLT, and lay the groundwork for more robust hallucination detection in multimodal generation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18374v1" target="_blank" rel="noopener noreferrer">
                基于公平性提示微调实现面向第二语言使用者的公平自动语音识别
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Towards Fair ASR For Second Language Speakers Using Fairness Prompted Finetuning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Monorama Swain, Bubai Maji, Jagabandhu Mishra, Markus Schedl, Anders Søgaard, Je...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动语音识别(ASR)系统的公平性，这属于语音技术领域，与推荐系统、搜索或广告的核心技术无关。论文讨论的公平性主题明确属于被排除的无关话题范畴，且没有显示出在推荐、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 07:45:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18374v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18374v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this work, we address the challenge of building fair English ASR systems for second-language speakers. Our analysis of widely used ASR models, Whisper and Seamless-M4T, reveals large fluctuations in word error rate (WER) across 26 accent groups, indicating significant fairness gaps. To mitigate this, we propose fairness-prompted finetuning with lightweight adapters, incorporating Spectral Decoupling (SD), Group Distributionally Robust Optimization (Group-DRO), and Invariant Risk Minimization (IRM). Our proposed fusion of traditional empirical risk minimization (ERM) with cross-entropy and fairness-driven objectives (SD, Group DRO, and IRM) enhances fairness across accent groups while maintaining overall recognition accuracy. In terms of macro-averaged word error rate, our approach achieves a relative improvement of 58.7% and 58.5% over the large pretrained Whisper and SeamlessM4T, and 9.7% and 7.8% over them, finetuning with standard empirical risk minimization with cross-entropy loss.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18339v1" target="_blank" rel="noopener noreferrer">
                ECG-LLM：面向心电图的领域特定大语言模型的训练与评估
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ECG-LLM-- training and evaluation of domain-specific large language models for electrocardiography
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lara Ahrens, Wilhelm Haverkamp, Nils Strodthoff
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的心电图（ECG）应用，属于明确的医疗领域特定应用。这完全落在无关主题范围内，与推荐系统、搜索、广告或相关使能技术没有任何潜在关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 06:45:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18339v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18339v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Domain-adapted open-weight large language models (LLMs) offer promising healthcare applications, from queryable knowledge bases to multimodal assistants, with the crucial advantage of local deployment for privacy preservation. However, optimal adaptation strategies, evaluation methodologies, and performance relative to general-purpose LLMs remain poorly characterized. We investigated these questions in electrocardiography, an important area of cardiovascular medicine, by finetuning open-weight models on domain-specific literature and implementing a multi-layered evaluation framework comparing finetuned models, retrieval-augmented generation (RAG), and Claude Sonnet 3.7 as a representative general-purpose model. Finetuned Llama 3.1 70B achieved superior performance on multiple-choice evaluations and automatic text metrics, ranking second to Claude 3.7 in LLM-as-a-judge assessments. Human expert evaluation favored Claude 3.7 and RAG approaches for complex queries. Finetuned models significantly outperformed their base counterparts across nearly all evaluation modes. Our findings reveal substantial performance heterogeneity across evaluation methodologies, underscoring assessment complexity. Nevertheless, domain-specific adaptation through finetuning and RAG achieves competitive performance with proprietary models, supporting the viability of privacy-preserving, locally deployable clinical solutions.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18333v1" target="_blank" rel="noopener noreferrer">
                立场：大语言模型水印技术应协调利益相关者动机以实现实际应用
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Position: LLM Watermarking Should Align Stakeholders' Incentives for Practical Adoption
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yepeng Liu, Xuandong Zhao, Dawn Song, Gregory W. Wornell, Yuheng Bu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于LLM水印技术，这属于安全、隐私和伦理范畴，明确列在无关主题中。水印技术主要用于内容溯源和版权保护，与推荐系统、搜索或广告的核心技术进展、架构改进或直接应用无关，因此完全不相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 06:34:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18333v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18333v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CR</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Despite progress in watermarking algorithms for large language models (LLMs), real-world deployment remains limited. We argue that this gap stems from misaligned incentives among LLM providers, platforms, and end users, which manifest as four key barriers: competitive risk, detection-tool governance, robustness concerns and attribution issues. We revisit three classes of watermarking through this lens. \emph{Model watermarking} naturally aligns with LLM provider interests, yet faces new challenges in open-source ecosystems. \emph{LLM text watermarking} offers modest provider benefit when framed solely as an anti-misuse tool, but can gain traction in narrowly scoped settings such as dataset de-contamination or user-controlled provenance. \emph{In-context watermarking} (ICW) is tailored for trusted parties, such as conference organizers or educators, who embed hidden watermarking instructions into documents. If a dishonest reviewer or student submits this text to an LLM, the output carries a detectable watermark indicating misuse. This setup aligns incentives: users experience no quality loss, trusted parties gain a detection tool, and LLM providers remain neutral by simply following watermark instructions. We advocate for a broader exploration of incentive-aligned methods, with ICW as an example, in domains where trusted parties need reliable tools to detect misuse. More broadly, we distill design principles for incentive-aligned, domain-specific watermarking and outline future research directions. Our position is that the practical adoption of LLM watermarking requires aligning stakeholder incentives in targeted application domains and fostering active community engagement.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18304v1" target="_blank" rel="noopener noreferrer">
                图像分辨率对生物医学多模态大语言模型的影响
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            The Impact of Image Resolution on Biomedical Multimodal Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Liangyu Chen, James Burgess, Jeffrey J Nirschl, Orr Zohar, Serena Yeung-Levy
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文明确聚焦于生物医学领域的多模态大语言模型，这属于被明确排除的医学/生物学特定领域应用。虽然涉及多模态模型，但缺乏与推荐系统、搜索或广告的潜在关联，且生物医学应用属于明确的无关主题范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 05:19:43
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18304v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18304v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Imaging technologies are fundamental to biomedical research and modern medicine, requiring analysis of high-resolution images across various modalities. While multimodal large language models (MLLMs) show promise for biomedical image analysis, most are designed for low-resolution images from general-purpose datasets, risking critical information loss. We investigate how image resolution affects MLLM performance in biomedical applications and demonstrate that: (1) native-resolution training and inference significantly improve performance across multiple tasks, (2) misalignment between training and inference resolutions severely degrades performance, and (3) mixed-resolution training effectively mitigates misalignment and balances computational constraints with performance requirements. Based on these findings, we recommend prioritizing native-resolution inference and mixed-resolution datasets to optimize biomedical MLLMs for transformative impact in scientific research and clinical applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18289v1" target="_blank" rel="noopener noreferrer">
                Food4All：一个集成营养元数据的实时免费食物发现多智能体框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Food4All: A Multi-Agent Framework for Real-time Free Food Discovery with Integrated Nutritional Metadata
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhengqing Yuan, Yiyang Li, Weixiang Sun, Zheyuan Zhang, Kaiwen Shi, Keerthiram M...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于特定领域的食物发现应用，涉及多智能体系统和营养元数据，与推荐系统、搜索或广告的核心领域进展无关。它属于医疗/健康领域的特定应用，不在当前关注的技术范围内。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 04:35:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18289v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18289v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.CY</span><span class="category-tag">cs.MA</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Food insecurity remains a persistent public health emergency in the United States, tightly interwoven with chronic disease, mental illness, and opioid misuse. Yet despite the existence of thousands of food banks and pantries, access remains fragmented: 1) current retrieval systems depend on static directories or generic search engines, which provide incomplete and geographically irrelevant results; 2) LLM-based chatbots offer only vague nutritional suggestions and fail to adapt to real-world constraints such as time, mobility, and transportation; and 3) existing food recommendation systems optimize for culinary diversity but overlook survival-critical needs of food-insecure populations, including immediate proximity, verified availability, and contextual barriers. These limitations risk leaving the most vulnerable individuals, those experiencing homelessness, addiction, or digital illiteracy, unable to access urgently needed resources. To address this, we introduce Food4All, the first multi-agent framework explicitly designed for real-time, context-aware free food retrieval. Food4All unifies three innovations: 1) heterogeneous data aggregation across official databases, community platforms, and social media to provide a continuously updated pool of food resources; 2) a lightweight reinforcement learning algorithm trained on curated cases to optimize for both geographic accessibility and nutritional correctness; and 3) an online feedback loop that dynamically adapts retrieval policies to evolving user needs. By bridging information acquisition, semantic analysis, and decision support, Food4All delivers nutritionally annotated and guidance at the point of need. This framework establishes an urgent step toward scalable, equitable, and intelligent systems that directly support populations facing food insecurity and its compounding health risks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18288v1" target="_blank" rel="noopener noreferrer">
                BrailleLLM：基于大语言模型的盲文指令微调用于盲文领域任务
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            BrailleLLM: Braille Instruction Tuning with Large Language Models for Braille Domain Tasks
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianyuan Huang, Zepeng Zhu, Hangdi Xing, Zirui Shao, Zhi Yu, Chaoxiong Yang, Jia...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于盲文这一特定领域的应用，属于辅助技术领域，与推荐系统、搜索或广告的核心技术进展无关。论文内容涉及为特定用户群体（视障人士）定制LLM应用，这属于领域特定的应用场景，不在当前关注的技术范畴内。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 04:33:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18288v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18288v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Braille plays a vital role in education and information accessibility for visually impaired individuals. However, Braille information processing faces challenges such as data scarcity and ambiguities in mixed-text contexts. We construct English and Chinese Braille Mixed Datasets (EBMD/CBMD) with mathematical formulas to support diverse Braille domain research, and propose a syntax tree-based augmentation method tailored for Braille data. To address the underperformance of traditional fine-tuning methods in Braille-related tasks, we investigate Braille Knowledge-Based Fine-Tuning (BKFT), which reduces the learning difficulty of Braille contextual features. BrailleLLM employs BKFT via instruction tuning to achieve unified Braille translation, formula-to-Braille conversion, and mixed-text translation. Experiments demonstrate that BKFT achieves significant performance improvements over conventional fine-tuning in Braille translation scenarios. Our open-sourced datasets and methodologies establish a foundation for low-resource multilingual Braille research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18837v1" target="_blank" rel="noopener noreferrer">
                FedDEAP：面向多领域联邦学习的自适应双提示调优
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FedDEAP: Adaptive Dual-Prompt Tuning for Multi-Domain Federated Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yubin Zheng, Pak-Hei Yeung, Jing Xia, Tianjie Ju, Peng Tang, Weidong Qiu, Jagath...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及联邦学习（Federated Learning），这属于明确列出的无关主题。虽然提到了提示调优（Prompt Tuning）这一LLM技术，但核心焦点是多领域联邦学习框架，而非在推荐系统、搜索或广告中的直接应用或技术赋能。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:32:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18837v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18837v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Federated learning (FL) enables multiple clients to collaboratively train machine learning models without exposing local data, balancing performance and privacy. However, domain shift and label heterogeneity across clients often hinder the generalization of the aggregated global model. Recently, large-scale vision-language models like CLIP have shown strong zero-shot classification capabilities, raising the question of how to effectively fine-tune CLIP across domains in a federated setting. In this work, we propose an adaptive federated prompt tuning framework, FedDEAP, to enhance CLIP's generalization in multi-domain scenarios. Our method includes the following three key components: (1) To mitigate the loss of domain-specific information caused by label-supervised tuning, we disentangle semantic and domain-specific features in images by using semantic and domain transformation networks with unbiased mappings; (2) To preserve domain-specific knowledge during global prompt aggregation, we introduce a dual-prompt design with a global semantic prompt and a local domain prompt to balance shared and personalized information; (3) To maximize the inclusion of semantic and domain information from images in the generated text features, we align textual and visual representations under the two learned transformations to preserve semantic and domain consistency. Theoretical analysis and extensive experiments on four datasets demonstrate the effectiveness of our method in enhancing the generalization of CLIP for federated image recognition across multiple domains.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18819v1" target="_blank" rel="noopener noreferrer">
                一种用于增强结核病及症状检测的可解释混合人工智能框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            An Explainable Hybrid AI Framework for Enhanced Tuberculosis and Symptom Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Neel Patel, Alexander Wong, Ashkan Ebadi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医疗领域的结核病检测应用，属于明确的医疗/生物学特定领域，与推荐系统、搜索或广告完全无关。论文标题中提到的混合AI框架和可解释性技术虽然具有技术价值，但应用场景被严格限定在医疗诊断领域，没有任何与RecSys/Search/Ads相关的潜在应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 17:18:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18819v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18819v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Tuberculosis remains a critical global health issue, particularly in resource-limited and remote areas. Early detection is vital for treatment, yet the lack of skilled radiologists underscores the need for artificial intelligence (AI)-driven screening tools. Developing reliable AI models is challenging due to the necessity for large, high-quality datasets, which are costly to obtain. To tackle this, we propose a teacher--student framework which enhances both disease and symptom detection on chest X-rays by integrating two supervised heads and a self-supervised head. Our model achieves an accuracy of 98.85% for distinguishing between COVID-19, tuberculosis, and normal cases, and a macro-F1 score of 90.09% for multilabel symptom detection, significantly outperforming baselines. The explainability assessments also show the model bases its predictions on relevant anatomical features, demonstrating promise for deployment in clinical screening and triage settings.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18781v1" target="_blank" rel="noopener noreferrer">
                叛逆学生：一种用于高光谱异常检测中背景特征增强的互补学习框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Rebellious Student: A Complementary Learning Framework for Background Feature Enhancement in Hyperspectral Anomaly Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wenping Jin, Yuyang Tang, Li Zhu, Fei Guo
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于高光谱异常检测，这是一个纯粹的计算机视觉/遥感应用领域，与推荐系统、搜索或广告没有任何关联。论文中提到的背景特征增强和互补学习框架是专门为高光谱图像分析设计的，在推荐、搜索或广告领域没有明显的应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 16:31:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18781v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18781v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    A recent class of hyperspectral anomaly detection methods that can be trained once on background datasets and then universally deployed -- without per-scene retraining or parameter tuning -- has demonstrated remarkable efficiency and robustness. Building upon this paradigm, we focus on the integration of spectral and spatial cues and introduce a novel "Rebellious Student" framework for complementary feature learning. Unlike conventional teacher-student paradigms driven by imitation, our method intentionally trains the spatial branch to diverge from the spectral teacher, thereby learning complementary spatial patterns that the teacher fails to capture. A two-stage learning strategy is adopted: (1) a spectral enhancement network is first trained via reverse distillation to obtain robust background spectral representations; and (2) a spatial network -- the rebellious student -- is subsequently optimized using decorrelation losses that enforce feature orthogonality while maintaining reconstruction fidelity to avoid irrelevant noise. Once trained, the framework enhances both spectral and spatial background features, enabling parameter-free and training-free anomaly detection when paired with conventional detectors. Extensive experiments on the HAD100 benchmark show substantial improvements over several established baselines with minimal computational overhead, confirming the effectiveness and generality of the proposed complementary learning paradigm. Our code is publicly available at https://github.com/xjpp2016/FERS.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18775v1" target="_blank" rel="noopener noreferrer">
                UltraGen：基于分层注意力的高分辨率视频生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UltraGen: High-Resolution Video Generation with Hierarchical Attention
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Teng Hu, Jiangning Zhang, Zihan Su, Ran Yi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于高分辨率视频生成技术，属于纯粹的视觉内容生成领域，与推荐系统、搜索或广告的核心技术无关。分层注意力机制虽然本身是Transformer相关技术，但论文的应用场景是视频生成，没有显示出在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 16:23:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18775v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18775v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in video generation have made it possible to produce visually compelling videos, with wide-ranging applications in content creation, entertainment, and virtual reality. However, most existing diffusion transformer based video generation models are limited to low-resolution outputs (<=720P) due to the quadratic computational complexity of the attention mechanism with respect to the output width and height. This computational bottleneck makes native high-resolution video generation (1080P/2K/4K) impractical for both training and inference. To address this challenge, we present UltraGen, a novel video generation framework that enables i) efficient and ii) end-to-end native high-resolution video synthesis. Specifically, UltraGen features a hierarchical dual-branch attention architecture based on global-local attention decomposition, which decouples full attention into a local attention branch for high-fidelity regional content and a global attention branch for overall semantic consistency. We further propose a spatially compressed global modeling strategy to efficiently learn global dependencies, and a hierarchical cross-window local attention mechanism to reduce computational costs while enhancing information flow across different local windows. Extensive experiments demonstrate that UltraGen can effectively scale pre-trained low-resolution video models to 1080P and even 4K resolution for the first time, outperforming existing state-of-the-art methods and super-resolution based two-stage pipelines in both qualitative and quantitative evaluations.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18773v1" target="_blank" rel="noopener noreferrer">
                基于微调地理空间基础模型的城市热岛检测与模拟及其对微气候影响的预测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Detection and Simulation of Urban Heat Islands Using a Fine-Tuned Geospatial Foundation Model for Microclimate Impact Prediction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jannis Fleckenstein, David Kreismann, Tamara Rosemary Govindasamy, Thomas Brunsc...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于地理空间建模和城市气候分析，与推荐系统、搜索或广告领域完全无关。论文内容涉及城市热岛检测、微气候预测等环境科学应用，没有任何技术要素可以应用于我的核心关注领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 16:21:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18773v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18773v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">68T07</span><span class="category-tag">I.2.6; I.5.4</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    As urbanization and climate change progress, urban heat island effects are becoming more frequent and severe. To formulate effective mitigation plans, cities require detailed air temperature data, yet conventional machine learning models with limited data often produce inaccurate predictions, particularly in underserved areas. Geospatial foundation models trained on global unstructured data offer a promising alternative by demonstrating strong generalization and requiring only minimal fine-tuning. In this study, an empirical ground truth of urban heat patterns is established by quantifying cooling effects from green spaces and benchmarking them against model predictions to evaluate the model's accuracy. The foundation model is subsequently fine-tuned to predict land surface temperatures under future climate scenarios, and its practical value is demonstrated through a simulated inpainting that highlights its role for mitigation support. The results indicate that foundation models offer a powerful way for evaluating urban heat island mitigation strategies in data-scarce regions to support more climate-resilient cities.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18739v1" target="_blank" rel="noopener noreferrer">
                基于光照衰减感知3D高斯泼溅的可移动光源自适应结肠镜重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Moving Light Adaptive Colonoscopy Reconstruction via Illumination-Attenuation-Aware 3D Gaussian Splatting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hao Wang, Ying Zhou, Haoyu Zhao, Rui Wang, Qiang Hu, Xing Zhang, Qiang Li, Zhiwe...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学影像领域的结肠镜3D重建技术，涉及光照衰减感知和自适应重建方法。虽然技术层面包含3D重建，但完全属于医学/生物医学应用范畴，与推荐系统、搜索、广告等商业应用领域没有任何关联，也不涉及LLM、Transformer或异构数据建模技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:44:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18739v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18739v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    3D Gaussian Splatting (3DGS) has emerged as a pivotal technique for real-time view synthesis in colonoscopy, enabling critical applications such as virtual colonoscopy and lesion tracking. However, the vanilla 3DGS assumes static illumination and that observed appearance depends solely on viewing angle, which causes incompatibility with the photometric variations in colonoscopic scenes induced by dynamic light source/camera. This mismatch forces most 3DGS methods to introduce structure-violating vaporous Gaussian blobs between the camera and tissues to compensate for illumination attenuation, ultimately degrading the quality of 3D reconstructions. Previous works only consider the illumination attenuation caused by light distance, ignoring the physical characters of light source and camera. In this paper, we propose ColIAGS, an improved 3DGS framework tailored for colonoscopy. To mimic realistic appearance under varying illumination, we introduce an Improved Appearance Modeling with two types of illumination attenuation factors, which enables Gaussians to adapt to photometric variations while preserving geometry accuracy. To ensure the geometry approximation condition of appearance modeling, we propose an Improved Geometry Modeling using high-dimensional view embedding to enhance Gaussian geometry attribute prediction. Furthermore, another cosine embedding input is leveraged to generate illumination attenuation solutions in an implicit manner. Comprehensive experimental results on standard benchmarks demonstrate that our proposed ColIAGS achieves the dual capabilities of novel view synthesis and accurate geometric reconstruction. It notably outperforms other state-of-the-art methods by achieving superior rendering fidelity while significantly reducing Depth MSE. Code will be available.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18714v1" target="_blank" rel="noopener noreferrer">
                PLANA3R：通过前馈平面泼溅实现零样本度量平面三维重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            PLANA3R: Zero-shot Metric Planar 3D Reconstruction via Feed-Forward Planar Splatting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Changkun Liu, Bin Tan, Zeran Ke, Shangzhan Zhang, Jiachen Liu, Ming Qian, Nan Xu...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的3D重建技术，特别是平面3D重建，这属于纯粹的视觉领域研究。论文标题中提到的平面泼溅和度量重建技术没有显示出与推荐系统、搜索或广告领域的明显关联，也不涉及LLM技术、Transformer架构或异构数据处理方法。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 15:15:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18714v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18714v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper addresses metric 3D reconstruction of indoor scenes by exploiting their inherent geometric regularities with compact representations. Using planar 3D primitives - a well-suited representation for man-made environments - we introduce PLANA3R, a pose-free framework for metric Planar 3D Reconstruction from unposed two-view images. Our approach employs Vision Transformers to extract a set of sparse planar primitives, estimate relative camera poses, and supervise geometry learning via planar splatting, where gradients are propagated through high-resolution rendered depth and normal maps of primitives. Unlike prior feedforward methods that require 3D plane annotations during training, PLANA3R learns planar 3D structures without explicit plane supervision, enabling scalable training on large-scale stereo datasets using only depth and normal annotations. We validate PLANA3R on multiple indoor-scene datasets with metric supervision and demonstrate strong generalization to out-of-domain indoor environments across diverse tasks under metric evaluation protocols, including 3D surface reconstruction, depth estimation, and relative pose estimation. Furthermore, by formulating with planar 3D representation, our method emerges with the ability for accurate plane segmentation. The project page is available at https://lck666666.github.io/plana3r
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18701v1" target="_blank" rel="noopener noreferrer">
                UniGenBench++：面向文本到图像生成的统一语义评估基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UniGenBench++: A Unified Semantic Evaluation Benchmark for Text-to-Image Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yibin Wang, Zhimin Li, Yuhang Zang, Jiazi Bu, Yujie Zhou, Yi Xin, Junjun He, Chu...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本到图像生成的评估基准，属于纯粹的AIGC和内容生成领域。虽然标题提到'统一语义评估'，但这主要针对生成模型的质量评估，与推荐系统、搜索或广告的核心技术没有直接关联。该工作属于被明确排除的'纯粹LLM中心化主题'和'AIGC、内容生成'类别。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 14:56:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18701v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18701v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent progress in text-to-image (T2I) generation underscores the importance of reliable benchmarks in evaluating how accurately generated images reflect the semantics of their textual prompt. However, (1) existing benchmarks lack the diversity of prompt scenarios and multilingual support, both essential for real-world applicability; (2) they offer only coarse evaluations across primary dimensions, covering a narrow range of sub-dimensions, and fall short in fine-grained sub-dimension assessment. To address these limitations, we introduce UniGenBench++, a unified semantic assessment benchmark for T2I generation. Specifically, it comprises 600 prompts organized hierarchically to ensure both coverage and efficiency: (1) spans across diverse real-world scenarios, i.e., 5 main prompt themes and 20 subthemes; (2) comprehensively probes T2I models' semantic consistency over 10 primary and 27 sub evaluation criteria, with each prompt assessing multiple testpoints. To rigorously assess model robustness to variations in language and prompt length, we provide both English and Chinese versions of each prompt in short and long forms. Leveraging the general world knowledge and fine-grained image understanding capabilities of a closed-source Multi-modal Large Language Model (MLLM), i.e., Gemini-2.5-Pro, an effective pipeline is developed for reliable benchmark construction and streamlined model assessment. Moreover, to further facilitate community use, we train a robust evaluation model that enables offline assessment of T2I model outputs. Through comprehensive benchmarking of both open- and closed-sourced T2I models, we systematically reveal their strengths and weaknesses across various aspects.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18671v1" target="_blank" rel="noopener noreferrer">
                超越流水线：分析端到端深度学习在历史作者识别中的关键因素
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond the Pipeline: Analyzing Key Factors in End-to-End Deep Learning for Historical Writer Identification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hanif Rasyidi, Moshiur Farazi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于历史作者识别这一特定领域应用，与推荐系统、搜索或广告的核心技术无关。虽然涉及深度学习技术，但主要应用于文档分析和笔迹识别领域，缺乏与RecSys/Search/Ads领域的直接关联或潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 14:25:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18671v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18671v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper investigates various factors that influence the performance of end-to-end deep learning approaches for historical writer identification (HWI), a task that remains challenging due to the diversity of handwriting styles, document degradation, and the limited number of labelled samples per writer. These conditions often make accurate recognition difficult, even for human experts. Traditional HWI methods typically rely on handcrafted image processing and clustering techniques, which tend to perform well on small and carefully curated datasets. In contrast, end-to-end pipelines aim to automate the process by learning features directly from document images. However, our experiments show that many of these models struggle to generalise in more realistic, document-level settings, especially under zero-shot scenarios where writers in the test set are not present in the training data. We explore different combinations of pre-processing methods, backbone architectures, and post-processing strategies, including text segmentation, patch sampling, and feature aggregation. The results suggest that most configurations perform poorly due to weak capture of low-level visual features, inconsistent patch representations, and high sensitivity to content noise. Still, we identify one end-to-end setup that achieves results comparable to the top-performing system, despite using a simpler design. These findings point to key challenges in building robust end-to-end systems and offer insight into design choices that improve performance in historical document writer identification.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18668v1" target="_blank" rel="noopener noreferrer">
                用于心血管传感器贴片的端到端多模态微型CNN原型设计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Prototyping an End-to-End Multi-Modal Tiny-CNN for Cardiovascular Sensor Patches
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mustafa Fuad Rifet Ibrahim, Tunc Alkanat, Maurice Meijer, Felix Manthey, Alexand...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医疗领域的心血管传感器和微型CNN架构，属于明确的医疗/生物应用范畴。尽管涉及多模态和CNN技术，但其应用场景（心血管传感器贴片）与推荐系统、搜索或广告领域完全无关，且没有显示出任何在这些领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 14:23:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18668v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18668v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The vast majority of cardiovascular diseases may be preventable if early signs and risk factors are detected. Cardiovascular monitoring with body-worn sensor devices like sensor patches allows for the detection of such signs while preserving the freedom and comfort of patients. However, the analysis of the sensor data must be robust, reliable, efficient, and highly accurate. Deep learning methods can automate data interpretation, reducing the workload of clinicians. In this work, we analyze the feasibility of applying deep learning models to the classification of synchronized electrocardiogram (ECG) and phonocardiogram (PCG) recordings on resource-constrained medical edge devices. We propose a convolutional neural network with early fusion of data to solve a binary classification problem. We train and validate our model on the synchronized ECG and PCG recordings from the Physionet Challenge 2016 dataset. Our approach reduces memory footprint and compute cost by three orders of magnitude compared to the state-of-the-art while maintaining competitive accuracy. We demonstrate the applicability of our proposed model on medical edge devices by analyzing energy consumption on a microcontroller and an experimental sensor device setup, confirming that on-device inference can be more energy-efficient than continuous data streaming.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18660v1" target="_blank" rel="noopener noreferrer">
                基于可逆网络的交互式卫星图像变化检测中的图像增强
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Image augmentation with invertible networks in interactive satellite image change detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hichem Sahbi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的卫星图像处理，涉及图像增强和变化检测技术。这些内容与推荐系统、搜索或广告的核心领域进展、LLM技术或Transformer架构没有直接关联，也不涉及异构数据的统一建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 14:11:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18660v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18660v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper devises a novel interactive satellite image change detection algorithm based on active learning. Our framework employs an iterative process that leverages a question-and-answer model. This model queries the oracle (user) about the labels of a small subset of images (dubbed as display), and based on the oracle's responses, change detection model is dynamically updated. The main contribution of our framework resides in a novel invertible network that allows augmenting displays, by mapping them from highly nonlinear input spaces to latent ones, where augmentation transformations become linear and more tractable. The resulting augmented data are afterwards mapped back to the input space, and used to retrain more effective change detection criteria in the subsequent iterations of active learning. Experimental results demonstrate superior performance of our proposed method compared to the related work.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18637v1" target="_blank" rel="noopener noreferrer">
                ε-Seg：显微数据的稀疏监督语义分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ε-Seg: Sparsely Supervised Semantic Segmentation of Microscopy Data
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sheida Rahnamai Kordasiabi, Damian Dalle Nogare, Florian Jug
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于显微数据的语义分割，属于计算机视觉在生物医学领域的特定应用。虽然语义分割技术本身是计算机视觉的基础任务，但论文明确限定在显微数据这一与推荐系统、搜索或广告无关的领域，且没有表明任何向异构数据统一建模或推荐系统应用的潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 13:41:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18637v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18637v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Semantic segmentation of electron microscopy (EM) images of biological samples remains a challenge in the life sciences. EM data captures details of biological structures, sometimes with such complexity that even human observers can find it overwhelming. We introduce {\epsilon}-Seg, a method based on hierarchical variational autoencoders (HVAEs), employing center-region masking, sparse label contrastive learning (CL), a Gaussian mixture model (GMM) prior, and clustering-free label prediction. Center-region masking and the inpainting loss encourage the model to learn robust and representative embeddings to distinguish the desired classes, even if training labels are sparse (0.05% of the total image data or less). For optimal performance, we employ CL and a GMM prior to shape the latent space of the HVAE such that encoded input patches tend to cluster wrt. the semantic classes we wish to distinguish. Finally, instead of clustering latent embeddings for semantic segmentation, we propose a MLP semantic segmentation head to directly predict class labels from latent embeddings. We show empirical results of {\epsilon}-Seg and baseline methods on 2 dense EM datasets of biological tissues and demonstrate the applicability of our method also on fluorescence microscopy data. Our results show that {\epsilon}-Seg is capable of achieving competitive sparsely-supervised segmentation results on complex biological image data, even if only limited amounts of training labels are available.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18573v1" target="_blank" rel="noopener noreferrer">
                Kaleido：开源多主体参考视频生成模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Kaleido: Open-Sourced Multi-Subject Reference Video Generation Model
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhenxing Zhang, Jiayan Teng, Zhuoyi Yang, Tiankun Cao, Cheng Wang, Xiaotao Gu, J...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频生成领域，属于纯粹的视觉内容生成技术，与推荐系统、搜索或广告的核心技术无关。虽然标题中提到'多主体参考'可能暗示某种多模态处理，但整体属于AIGC/内容生成范畴，明确属于应排除的无关主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 12:28:14
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18573v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18573v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present Kaleido, a subject-to-video~(S2V) generation framework, which aims to synthesize subject-consistent videos conditioned on multiple reference images of target subjects. Despite recent progress in S2V generation models, existing approaches remain inadequate at maintaining multi-subject consistency and at handling background disentanglement, often resulting in lower reference fidelity and semantic drift under multi-image conditioning. These shortcomings can be attributed to several factors. Primarily, the training dataset suffers from a lack of diversity and high-quality samples, as well as cross-paired data, i.e., paired samples whose components originate from different instances. In addition, the current mechanism for integrating multiple reference images is suboptimal, potentially resulting in the confusion of multiple subjects. To overcome these limitations, we propose a dedicated data construction pipeline, incorporating low-quality sample filtering and diverse data synthesis, to produce consistency-preserving training data. Moreover, we introduce Reference Rotary Positional Encoding (R-RoPE) to process reference images, enabling stable and precise multi-image integration. Extensive experiments across numerous benchmarks demonstrate that Kaleido significantly outperforms previous methods in consistency, fidelity, and generalization, marking an advance in S2V generation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18552v1" target="_blank" rel="noopener noreferrer">
                Descriptor：遮挡nuScenes：用于评估自动驾驶中感知鲁棒性的多传感器数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Descriptor: Occluded nuScenes: A Multi-Sensor Dataset for Evaluating Perception Robustness in Automated Driving
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sanjay Kumar, Tim Brophy, Reenu Mohandas, Eoin Martino Grua, Ganesh Sistu, Valen...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动驾驶领域的多传感器数据集和感知鲁棒性评估，与推荐系统、搜索或广告的核心技术领域完全无关。论文内容涉及计算机视觉和自动驾驶感知，属于纯粹的视觉应用，没有明确的推荐/搜索/广告应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 12:02:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18552v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18552v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Robust perception in automated driving requires reliable performance under adverse conditions, where sensors may be affected by partial failures or environmental occlusions. Although existing autonomous driving datasets inherently contain sensor noise and environmental variability, very few enable controlled, parameterised, and reproducible degradations across multiple sensing modalities. This gap limits the ability to systematically evaluate how perception and fusion architectures perform under well-defined adverse conditions. To address this limitation, we introduce the Occluded nuScenes Dataset, a novel extension of the widely used nuScenes benchmark. For the camera modality, we release both the full and mini versions with four types of occlusions, two adapted from public implementations and two newly designed. For radar and LiDAR, we provide parameterised occlusion scripts that implement three types of degradations each, enabling flexible and repeatable generation of occluded data. This resource supports consistent, reproducible evaluation of perception models under partial sensor failures and environmental interference. By releasing the first multi-sensor occlusion dataset with controlled and reproducible degradations, we aim to advance research on robust sensor fusion, resilience analysis, and safety-critical perception in automated driving.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18539v1" target="_blank" rel="noopener noreferrer">
                GBlobs：用于改进传感器放置泛化能力的局部LiDAR几何特征
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GBlobs: Local LiDAR Geometry for Improved Sensor Placement Generalization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dušan Malić, Christian Fruhwirth-Reisinger, Alexander Prutsch, Wei Lin, Samuel S...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于LiDAR传感器放置和几何特征，属于纯粹的3D视觉和传感器技术领域。虽然标题提到'泛化'，但这指的是传感器配置的泛化能力，与推荐系统、搜索或广告没有任何直接或间接的关联。该技术没有明显的应用潜力到RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 11:35:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18539v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18539v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This technical report outlines the top-ranking solution for RoboSense 2025: Track 3, achieving state-of-the-art performance on 3D object detection under various sensor placements. Our submission utilizes GBlobs, a local point cloud feature descriptor specifically designed to enhance model generalization across diverse LiDAR configurations. Current LiDAR-based 3D detectors often suffer from a \enquote{geometric shortcut} when trained on conventional global features (\ie, absolute Cartesian coordinates). This introduces a position bias that causes models to primarily rely on absolute object position rather than distinguishing shape and appearance characteristics. Although effective for in-domain data, this shortcut severely limits generalization when encountering different point distributions, such as those resulting from varying sensor placements. By using GBlobs as network input features, we effectively circumvent this geometric shortcut, compelling the network to learn robust, object-centric representations. This approach significantly enhances the model's ability to generalize, resulting in the exceptional performance demonstrated in this challenge.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18521v1" target="_blank" rel="noopener noreferrer">
                RayPose：用于未见过的6D物体姿态估计中模板视图的射线束扩散方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RayPose: Ray Bundling Diffusion for Template Views in Unseen 6D Object Pose Estimation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junwen Huang, Shishir Reddy Vutukur, Peter KT Yu, Nassir Navab, Slobodan Ilic, B...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的6D物体姿态估计，这是一个纯粹的视觉定位问题，与推荐系统、搜索或广告没有直接关联。论文涉及射线束扩散和模板视图等技术，属于3D视觉和机器人感知领域，不在当前关注的任何技术范畴内。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 11:01:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18521v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18521v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Typical template-based object pose pipelines estimate the pose by retrieving the closest matching template and aligning it with the observed image. However, failure to retrieve the correct template often leads to inaccurate pose predictions. To address this, we reformulate template-based object pose estimation as a ray alignment problem, where the viewing directions from multiple posed template images are learned to align with a non-posed query image. Inspired by recent progress in diffusion-based camera pose estimation, we embed this formulation into a diffusion transformer architecture that aligns a query image with a set of posed templates. We reparameterize object rotation using object-centered camera rays and model object translation by extending scale-invariant translation estimation to dense translation offsets. Our model leverages geometric priors from the templates to guide accurate query pose inference. A coarse-to-fine training strategy based on narrowed template sampling improves performance without modifying the network architecture. Extensive experiments across multiple benchmark datasets show competitive results of our method compared to state-of-the-art approaches in unseen object pose estimation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18513v1" target="_blank" rel="noopener noreferrer">
                DWaste：使用移动和边缘设备进行垃圾分类的绿色人工智能
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DWaste: Greener AI for Waste Sorting using Mobile and Edge Devices
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Suman Kunwar
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于环保AI和垃圾分类应用，属于特定领域应用（环境科学），与推荐系统、搜索或广告的核心技术进展完全无关。论文标题中提到的移动和边缘设备虽然涉及技术实现，但应用场景与我的关注领域没有任何直接或间接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 10:55:32
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18513v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18513v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rise of convenience packaging has led to generation of enormous waste, making efficient waste sorting crucial for sustainable waste management. To address this, we developed DWaste, a computer vision-powered platform designed for real-time waste sorting on resource-constrained smartphones and edge devices, including offline functionality. We benchmarked various image classification models (EfficientNetV2S/M, ResNet50/101, MobileNet) and object detection (YOLOv8n, YOLOv11n) using a subset of our own waste data set and annotated it using the custom tool Annotated Lab. We found a clear trade-off between accuracy and resource consumption: the best classifier, EfficientNetV2S, achieved high accuracy (~ 96%) but suffered from high latency (~ 0.22s) and elevated carbon emissions. In contrast, lightweight object detection models delivered strong performance (up to 77% mAP) with ultra-fast inference (~ 0.03s) and significantly smaller model sizes (< 7MB), making them ideal for real-time, low-power use. Model quantization further maximized efficiency, substantially reducing model size and VRAM usage by up to 75%. Our work demonstrates the successful implementation of "Greener AI" models to support real-time, sustainable waste sorting on edge devices.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18446v1" target="_blank" rel="noopener noreferrer">
                LAND：基于解剖学引导的3D胸部CT合成肺与结节扩散模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LAND: Lung and Nodule Diffusion for 3D Chest CT Synthesis with Anatomical Guidance
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Anna Oliveras, Roger Marí, Rafael Redondo, Oriol Guardià, Ana Tost, Bhalaji Naga...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学影像领域的3D胸部CT合成，属于医学/生物学特定领域应用，与推荐系统、搜索或广告完全无关。论文内容涉及医学图像生成和扩散模型在医疗领域的应用，这明确属于被排除的无关主题范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 09:20:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18446v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18446v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This work introduces a new latent diffusion model to generate high-quality 3D chest CT scans conditioned on 3D anatomical masks. The method synthesizes volumetric images of size 256x256x256 at 1 mm isotropic resolution using a single mid-range GPU, significantly lowering the computational cost compared to existing approaches. The conditioning masks delineate lung and nodule regions, enabling precise control over the output anatomical features. Experimental results demonstrate that conditioning solely on nodule masks leads to anatomically incorrect outputs, highlighting the importance of incorporating global lung structure for accurate conditional synthesis. The proposed approach supports the generation of diverse CT volumes with and without lung nodules of varying attributes, providing a valuable tool for training AI models or healthcare professionals.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18405v1" target="_blank" rel="noopener noreferrer">
                基于OCR引导的YOLOv8和轨迹建模的板球视频自动投球分割与弱点检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Automated Wicket-Taking Delivery Segmentation and Weakness Detection in Cricket Videos Using OCR-Guided YOLOv8 and Trajectory Modeling
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mst Jannatun Ferdous, Masum Billah, Joy Karmoker, Mohd Ruhul Ameen, Akif Islam, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于体育视频分析中的板球投球分割和弱点检测，使用计算机视觉技术处理特定领域视频数据。这与推荐系统、搜索或广告的核心领域进展、LLM技术或Transformer架构没有任何关联，也不涉及异构数据的统一建模。该研究纯粹是计算机视觉在体育分析中的应用，属于明确排除的无关主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 08:27:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18405v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18405v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper presents an automated system for cricket video analysis that leverages deep learning techniques to extract wicket-taking deliveries, detect cricket balls, and model ball trajectories. The system employs the YOLOv8 architecture for pitch and ball detection, combined with optical character recognition (OCR) for scorecard extraction to identify wicket-taking moments. Through comprehensive image preprocessing, including grayscale transformation, power transformation, and morphological operations, the system achieves robust text extraction from video frames. The pitch detection model achieved 99.5% mean Average Precision at 50% IoU (mAP50) with a precision of 0.999, while the ball detection model using transfer learning attained 99.18% mAP50 with 0.968 precision and 0.978 recall. The system enables trajectory modeling on detected pitches, providing data-driven insights for identifying batting weaknesses. Experimental results on multiple cricket match videos demonstrate the effectiveness of this approach for automated cricket analytics, offering significant potential for coaching and strategic decision-making.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18400v1" target="_blank" rel="noopener noreferrer">
                用于高光谱-多光谱图像融合的贝叶斯全连接张量网络
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Bayesian Fully-Connected Tensor Network for Hyperspectral-Multispectral Image Fusion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Linsong Shan, Zecan Yang, Laurence T. Yang, Changlong Li, Honglu Zhao, Xin Nie
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于高光谱和多光谱图像融合的计算机视觉任务，属于纯粹的视觉处理领域。虽然涉及张量网络和贝叶斯方法等技术，但这些技术在论文中被应用于特定的图像处理问题，与推荐系统、搜索或广告领域没有明显的关联或潜在应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 08:19:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18400v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18400v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Tensor decomposition is a powerful tool for data analysis and has been extensively employed in the field of hyperspectral-multispectral image fusion (HMF). Existing tensor decomposition-based fusion methods typically rely on disruptive data vectorization/reshaping or impose rigid constraints on the arrangement of factor tensors, hindering the preservation of spatial-spectral structures and the modeling of cross-dimensional correlations. Although recent advances utilizing the Fully-Connected Tensor Network (FCTN) decomposition have partially alleviated these limitations, the process of reorganizing data into higher-order tensors still disrupts the intrinsic spatial-spectral structure. Furthermore, these methods necessitate extensive manual parameter tuning and exhibit limited robustness against noise and spatial degradation. To alleviate these issues, we propose the Bayesian FCTN (BFCTN) method. Within this probabilistic framework, a hierarchical sparse prior that characterizing the sparsity of physical elements, establishes connections between the factor tensors. This framework explicitly models the intrinsic physical coupling among spatial structures, spectral signatures, and local scene homogeneity. For model learning, we develop a parameter estimation method based on Variational Bayesian inference (VB) and the Expectation-Maximization (EM) algorithm, which significantly reduces the need for manual parameter tuning. Extensive experiments demonstrate that BFCTN not only achieves state-of-the-art fusion accuracy and strong robustness but also exhibits practical applicability in complex real-world scenarios.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18396v1" target="_blank" rel="noopener noreferrer">
                基于里奇流的熵增强共形特征用于鲁棒的阿尔茨海默病分类
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Entropy-Enhanced Conformal Features from Ricci Flow for Robust Alzheimer's Disease Classification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>F. Ahmadi, B. Bidabad, H. Nasiri
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的阿尔茨海默病分类，属于明确的无关主题（医学/生物学应用）。论文标题中提到的里奇流和熵增强特征都是数学/医学影像处理方法，与推荐系统、搜索或广告领域没有任何技术关联或潜在应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 08:16:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18396v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18396v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Background and Objective: In brain imaging, geometric surface models are essential for analyzing the 3D shapes of anatomical structures. Alzheimer's disease (AD) is associated with significant cortical atrophy, making such shape analysis a valuable diagnostic tool. The objective of this study is to introduce and validate a novel local surface representation method for the automated and accurate diagnosis of AD. Methods: The study utilizes T1-weighted MRI scans from 160 participants (80 AD patients and 80 healthy controls) from the Alzheimer's Disease Neuroimaging Initiative (ADNI). Cortical surface models were reconstructed from the MRI data using Freesurfer. Key geometric attributes were computed from the 3D meshes. Area distortion and conformal factor were derived using Ricci flow for conformal parameterization, while Gaussian curvature was calculated directly from the mesh geometry. Shannon entropy was applied to these three features to create compact and informative feature vectors. The feature vectors were used to train and evaluate a suite of classifiers (e.g. XGBoost, MLP, Logistic Regression, etc.). Results: Statistical significance of performance differences between classifiers was evaluated using paired Welch's t-test. The method proved highly effective in distinguishing AD patients from healthy controls. The Multi-Layer Perceptron (MLP) and Logistic Regression classifiers outperformed all others, achieving an accuracy and F$_1$ Score of 98.62%. Conclusions: This study confirms that the entropy of conformally-derived geometric features provides a powerful and robust metric for cortical morphometry. The high classification accuracy underscores the method's potential to enhance the study and diagnosis of Alzheimer's disease, offering a straightforward yet powerful tool for clinical research applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18362v1" target="_blank" rel="noopener noreferrer">
                FeatureFool：通过特征映射实现视频模型的零查询欺骗
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FeatureFool: Zero-Query Fooling of Video Models via Feature Map
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Duoxun Tang, Xi Xiao, Guangwu Hu, Kangkang Sun, Xiao Yang, Dongyang Chen, Qing L...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频模型的对抗性攻击和安全性，这属于安全领域，被明确列为不相关主题。虽然涉及模型特征，但核心关注点是欺骗和攻击，与推荐系统、搜索或广告中的核心进展、LLM技术或Transformer架构改进没有任何关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 07:33:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18362v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18362v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The vulnerability of deep neural networks (DNNs) has been preliminarily verified. Existing black-box adversarial attacks usually require multi-round interaction with the model and consume numerous queries, which is impractical in the real-world and hard to scale to recently emerged Video-LLMs. Moreover, no attack in the video domain directly leverages feature maps to shift the clean-video feature space. We therefore propose FeatureFool, a stealthy, video-domain, zero-query black-box attack that utilizes information extracted from a DNN to alter the feature space of clean videos. Unlike query-based methods that rely on iterative interaction, FeatureFool performs a zero-query attack by directly exploiting DNN-extracted information. This efficient approach is unprecedented in the video domain. Experiments show that FeatureFool achieves an attack success rate above 70\% against traditional video classifiers without any queries. Benefiting from the transferability of the feature map, it can also craft harmful content and bypass Video-LLM recognition. Additionally, adversarial videos generated by FeatureFool exhibit high quality in terms of SSIM, PSNR, and Temporal-Inconsistency, making the attack barely perceptible. This paper may contain violent or explicit content.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18341v1" target="_blank" rel="noopener noreferrer">
                ViSE：一种纯视觉街景外推的系统性方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ViSE: A Systematic Approach to Vision-Only Street-View Extrapolation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kaiyuan Tan, Yingying Shen, Haiyang Sun, Bing Wang, Guang Chen, Hangjun Ye
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于纯视觉街景外推，属于计算机视觉领域，与推荐系统、搜索或广告没有明显关联。标题表明这是纯粹的视觉处理任务，没有涉及用户行为建模、内容理解或排名优化等RecSys/Search/Ads相关技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 06:50:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18341v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18341v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Realistic view extrapolation is critical for closed-loop simulation in autonomous driving, yet it remains a significant challenge for current Novel View Synthesis (NVS) methods, which often produce distorted and inconsistent images beyond the original trajectory. This report presents our winning solution which ctook first place in the RealADSim Workshop NVS track at ICCV 2025. To address the core challenges of street view extrapolation, we introduce a comprehensive four-stage pipeline. First, we employ a data-driven initialization strategy to generate a robust pseudo-LiDAR point cloud, avoiding local minima. Second, we inject strong geometric priors by modeling the road surface with a novel dimension-reduced SDF termed 2D-SDF. Third, we leverage a generative prior to create pseudo ground truth for extrapolated viewpoints, providing auxilary supervision. Finally, a data-driven adaptation network removes time-specific artifacts. On the RealADSim-NVS benchmark, our method achieves a final score of 0.441, ranking first among all participants.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18326v1" target="_blank" rel="noopener noreferrer">
                使用ATTBHFA-Net增强基准和灾害图像的少样本分类
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Enhancing Few-Shot Classification of Benchmark and Disaster Imagery with ATTBHFA-Net
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Gao Yu Lee, Tanmoy Dam, Md Meftahul Ferdaus, Daniel Puiu Poenar, Vu Duong
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的图像分类任务，特别是基准和灾害图像的少样本学习。虽然提到了注意力机制（ATT），但核心应用场景是视觉图像分析，与推荐系统、搜索或广告没有直接关联，也不涉及LLM技术或Transformer架构在序列建模中的应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 06:24:42
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18326v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18326v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The increasing frequency of natural and human-induced disasters necessitates advanced visual recognition techniques capable of analyzing critical photographic data. With progress in artificial intelligence and resilient computational systems, rapid and accurate disaster classification has become crucial for efficient rescue operations. However, visual recognition in disaster contexts faces significant challenges due to limited and diverse data from the difficulties in collecting and curating comprehensive, high-quality disaster imagery. Few-Shot Learning (FSL) provides a promising approach to data scarcity, yet current FSL research mainly relies on generic benchmark datasets lacking remote-sensing disaster imagery, limiting its practical effectiveness. Moreover, disaster images exhibit high intra-class variation and inter-class similarity, hindering the performance of conventional metric-based FSL methods. To address these issues, this paper introduces the Attention-based Bhattacharyya-Hellinger Feature Aggregation Network (ATTBHFA-Net), which linearly combines the Bhattacharyya coefficient and Hellinger distances to compare and aggregate feature probability distributions for robust prototype formation. The Bhattacharyya coefficient serves as a contrastive margin that enhances inter-class separability, while the Hellinger distance regularizes same-class alignment. This framework parallels contrastive learning but operates over probability distributions rather than embedded feature points. Furthermore, a Bhattacharyya-Hellinger distance-based contrastive loss is proposed as a distributional counterpart to cosine similarity loss, used jointly with categorical cross-entropy to significantly improve FSL performance. Experiments on four FSL benchmarks and two disaster image datasets demonstrate the superior effectiveness and generalization of ATTBHFA-Net compared to existing approaches.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18313v1" target="_blank" rel="noopener noreferrer">
                OmniNWM：全知驾驶导航世界模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            OmniNWM: Omniscient Driving Navigation World Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Bohan Li, Zhuang Ma, Dalong Du, Baorui Peng, Zhujin Liang, Zhenqiang Liu, Chao M...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动驾驶导航领域的世界模型构建，属于纯粹的机器人学和自动驾驶应用。标题中的'驾驶导航'和'世界模型'明确指向物理世界的感知与控制任务，与推荐系统、搜索或广告的核心技术栈完全无关。该研究没有显示出任何在异构数据建模、Transformer架构或LLM技术方面的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 05:49:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18313v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18313v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Autonomous driving world models are expected to work effectively across three core dimensions: state, action, and reward. Existing models, however, are typically restricted to limited state modalities, short video sequences, imprecise action control, and a lack of reward awareness. In this paper, we introduce OmniNWM, an omniscient panoramic navigation world model that addresses all three dimensions within a unified framework. For state, OmniNWM jointly generates panoramic videos of RGB, semantics, metric depth, and 3D occupancy. A flexible forcing strategy enables high-quality long-horizon auto-regressive generation. For action, we introduce a normalized panoramic Plucker ray-map representation that encodes input trajectories into pixel-level signals, enabling highly precise and generalizable control over panoramic video generation. Regarding reward, we move beyond learning reward functions with external image-based models: instead, we leverage the generated 3D occupancy to directly define rule-based dense rewards for driving compliance and safety. Extensive experiments demonstrate that OmniNWM achieves state-of-the-art performance in video generation, control accuracy, and long-horizon stability, while providing a reliable closed-loop evaluation framework through occupancy-grounded rewards. Project page is available at https://github.com/Arlo0o/OmniNWM.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18287v1" target="_blank" rel="noopener noreferrer">
                面向3D感知深度生成模型的高效少样本身份保持属性编辑
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Efficient Few-shot Identity Preserving Attribute Editing for 3D-aware Deep Generative Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Vishal Vinod
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D感知生成模型和属性编辑，属于计算机视觉和图形学领域。虽然涉及生成模型，但主要关注3D内容生成和编辑，与推荐系统、搜索或广告的核心技术没有直接关联，也不涉及Transformer架构或LLM技术在推荐/搜索/广告领域的潜在应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 04:27:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18287v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18287v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Identity preserving editing of faces is a generative task that enables modifying the illumination, adding/removing eyeglasses, face aging, editing hairstyles, modifying expression etc., while preserving the identity of the face. Recent progress in 2D generative models have enabled photorealistic editing of faces using simple techniques leveraging the compositionality in GANs. However, identity preserving editing for 3D faces with a given set of attributes is a challenging task as the generative model must reason about view consistency from multiple poses and render a realistic 3D face. Further, 3D portrait editing requires large-scale attribute labelled datasets and presents a trade-off between editability in low-resolution and inflexibility to editing in high resolution. In this work, we aim to alleviate some of the constraints in editing 3D faces by identifying latent space directions that correspond to photorealistic edits. To address this, we present a method that builds on recent advancements in 3D-aware deep generative models and 2D portrait editing techniques to perform efficient few-shot identity preserving attribute editing for 3D-aware generative models. We aim to show from experimental results that using just ten or fewer labelled images of an attribute is sufficient to estimate edit directions in the latent space that correspond to 3D-aware attribute editing. In this work, we leverage an existing face dataset with masks to obtain the synthetic images for few attribute examples required for estimating the edit directions. Further, to demonstrate the linearity of edits, we investigate one-shot stylization by performing sequential editing and use the (2D) Attribute Style Manipulation (ASM) technique to investigate a continuous style manifold for 3D consistent identity preserving face aging. Code and results are available at: https://vishal-vinod.github.io/gmpi-edit/
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18268v1" target="_blank" rel="noopener noreferrer">
                TreeFedDG：缓解医学图像分割中联邦领域泛化的全局漂移问题
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            TreeFedDG: Alleviating Global Drift in Federated Domain Generalization for Medical Image Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yucheng Song, Chenxi Li, Haokang Ding, Zhining Liao, Zhifang Liao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文明确涉及联邦学习（Federated Learning）和医学图像分割，这两个主题均属于明确的无关主题范畴。联邦学习在无关主题中被明确排除，而医学图像分割属于医学领域特定应用，同样不符合当前关注重点。该技术没有显示出在推荐系统、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 03:38:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18268v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18268v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In medical image segmentation tasks, Domain Generalization (DG) under the Federated Learning (FL) framework is crucial for addressing challenges related to privacy protection and data heterogeneity. However, traditional federated learning methods fail to account for the imbalance in information aggregation across clients in cross-domain scenarios, leading to the Global Drift (GD) problem and a consequent decline in model generalization performance. This motivates us to delve deeper and define a new critical issue: global drift in federated domain generalization for medical imaging (FedDG-GD). In this paper, we propose a novel tree topology framework called TreeFedDG. First, starting from the distributed characteristics of medical images, we design a hierarchical parameter aggregation method based on a tree-structured topology to suppress deviations in the global model direction. Second, we introduce a parameter difference-based style mixing method (FedStyle), which enforces mixing among clients with maximum parameter differences to enhance robustness against drift. Third, we develop a a progressive personalized fusion strategy during model distribution, ensuring a balance between knowledge transfer and personalized features. Finally, during the inference phase, we use feature similarity to guide the retrieval of the most relevant model chain from the tree structure for ensemble decision-making, thereby fully leveraging the advantages of hierarchical knowledge. We conducted extensive experiments on two publicly available datasets. The results demonstrate that our method outperforms other state-of-the-art domain generalization approaches in these challenging tasks and achieves better balance in cross-domain performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18267v1" target="_blank" rel="noopener noreferrer">
                用于人体网格恢复与并行优化的隐式信息与低维学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Latent-Info and Low-Dimensional Learning for Human Mesh Recovery and Parallel Optimization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiang Zhang, Suping Wu, Sheng Yang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的人体网格恢复任务，属于纯粹的视觉技术范畴。虽然提到了并行优化，但这与推荐系统、搜索或广告的核心技术需求没有直接关联，也没有展示出在异构数据处理或Transformer架构方面的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 03:35:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18267v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18267v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Existing 3D human mesh recovery methods often fail to fully exploit the latent information (e.g., human motion, shape alignment), leading to issues with limb misalignment and insufficient local details in the reconstructed human mesh (especially in complex scenes). Furthermore, the performance improvement gained by modelling mesh vertices and pose node interactions using attention mechanisms comes at a high computational cost. To address these issues, we propose a two-stage network for human mesh recovery based on latent information and low dimensional learning. Specifically, the first stage of the network fully excavates global (e.g., the overall shape alignment) and local (e.g., textures, detail) information from the low and high-frequency components of image features and aggregates this information into a hybrid latent frequency domain feature. This strategy effectively extracts latent information. Subsequently, utilizing extracted hybrid latent frequency domain features collaborates to enhance 2D poses to 3D learning. In the second stage, with the assistance of hybrid latent features, we model the interaction learning between the rough 3D human mesh template and the 3D pose, optimizing the pose and shape of the human mesh. Unlike existing mesh pose interaction methods, we design a low-dimensional mesh pose interaction method through dimensionality reduction and parallel optimization that significantly reduces computational costs without sacrificing reconstruction accuracy. Extensive experimental results on large publicly available datasets indicate superiority compared to the most state-of-the-art.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18263v1" target="_blank" rel="noopener noreferrer">
                从竞争到协同：解锁强化学习在主体驱动图像生成中的应用
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            From Competition to Synergy: Unlocking Reinforcement Learning for Subject-Driven Image Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ziwei Huang, Ying Shu, Hao Fang, Quanyu Long, Wenya Wang, Qiushi Guo, Tiezheng G...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于图像生成领域，属于AIGC和内容生成的范畴，这在无关主题中被明确排除。虽然涉及强化学习，但应用于图像生成而非推荐系统、搜索或广告的排序任务，与当前关注点无直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 03:32:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18263v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18263v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.GR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Subject-driven image generation models face a fundamental trade-off between identity preservation (fidelity) and prompt adherence (editability). While online reinforcement learning (RL), specifically GPRO, offers a promising solution, we find that a naive application of GRPO leads to competitive degradation, as the simple linear aggregation of rewards with static weights causes conflicting gradient signals and a misalignment with the temporal dynamics of the diffusion process. To overcome these limitations, we propose Customized-GRPO, a novel framework featuring two key innovations: (i) Synergy-Aware Reward Shaping (SARS), a non-linear mechanism that explicitly penalizes conflicted reward signals and amplifies synergistic ones, providing a sharper and more decisive gradient. (ii) Time-Aware Dynamic Weighting (TDW), which aligns the optimization pressure with the model's temporal dynamics by prioritizing prompt-following in the early, identity preservation in the later. Extensive experiments demonstrate that our method significantly outperforms naive GRPO baselines, successfully mitigating competitive degradation. Our model achieves a superior balance, generating images that both preserve key identity features and accurately adhere to complex textual prompts.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18262v1" target="_blank" rel="noopener noreferrer">
                UWBench：面向水下理解的综合性视觉语言基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UWBench: A Comprehensive Vision-Language Benchmark for Underwater Understanding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Da Zhang, Chenggang Rong, Bingyu Li, Feiyu Wang, Zhiyuan Zhao, Junyu Gao, Xuelon...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于水下视觉语言基准测试，属于纯粹的视觉-语言模型评估领域。虽然标题提及视觉语言模型，但其应用场景（水下理解）与推荐系统、搜索或广告领域毫无关联，且属于基准测试类工作，这在我列出的无关主题中明确排除。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 03:32:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18262v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18262v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large vision-language models (VLMs) have achieved remarkable success in natural scene understanding, yet their application to underwater environments remains largely unexplored. Underwater imagery presents unique challenges including severe light attenuation, color distortion, and suspended particle scattering, while requiring specialized knowledge of marine ecosystems and organism taxonomy. To bridge this gap, we introduce UWBench, a comprehensive benchmark specifically designed for underwater vision-language understanding. UWBench comprises 15,003 high-resolution underwater images captured across diverse aquatic environments, encompassing oceans, coral reefs, and deep-sea habitats. Each image is enriched with human-verified annotations including 15,281 object referring expressions that precisely describe marine organisms and underwater structures, and 124,983 question-answer pairs covering diverse reasoning capabilities from object recognition to ecological relationship understanding. The dataset captures rich variations in visibility, lighting conditions, and water turbidity, providing a realistic testbed for model evaluation. Based on UWBench, we establish three comprehensive benchmarks: detailed image captioning for generating ecologically informed scene descriptions, visual grounding for precise localization of marine organisms, and visual question answering for multimodal reasoning about underwater environments. Extensive experiments on state-of-the-art VLMs demonstrate that underwater understanding remains challenging, with substantial room for improvement. Our benchmark provides essential resources for advancing vision-language research in underwater contexts and supporting applications in marine science, ecological monitoring, and autonomous underwater exploration. Our code and benchmark will be available.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18256v1" target="_blank" rel="noopener noreferrer">
                利用时序运动先验进行人体网格恢复的双曲空间学习方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Hyperbolic Space Learning Method Leveraging Temporal Motion Priors for Human Mesh Recovery
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiang Zhang, Suping Wu, Weibin Qiu, Zhaocheng Jin, Sheng Yang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的人体网格恢复任务，涉及双曲空间学习和时序运动建模。虽然技术上有创新性，但属于纯粹的视觉/3D视觉研究，与推荐系统、搜索或广告的核心技术领域没有明显关联，也不涉及Transformer架构或LLM技术的应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 03:26:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18256v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18256v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    3D human meshes show a natural hierarchical structure (like torso-limbs-fingers). But existing video-based 3D human mesh recovery methods usually learn mesh features in Euclidean space. It's hard to catch this hierarchical structure accurately. So wrong human meshes are reconstructed. To solve this problem, we propose a hyperbolic space learning method leveraging temporal motion prior for recovering 3D human meshes from videos. First, we design a temporal motion prior extraction module. This module extracts the temporal motion features from the input 3D pose sequences and image feature sequences respectively. Then it combines them into the temporal motion prior. In this way, it can strengthen the ability to express features in the temporal motion dimension. Since data representation in non-Euclidean space has been proved to effectively capture hierarchical relationships in real-world datasets (especially in hyperbolic space), we further design a hyperbolic space optimization learning strategy. This strategy uses the temporal motion prior information to assist learning, and uses 3D pose and pose motion information respectively in the hyperbolic space to optimize and learn the mesh features. Then, we combine the optimized results to get an accurate and smooth human mesh. Besides, to make the optimization learning process of human meshes in hyperbolic space stable and effective, we propose a hyperbolic mesh optimization loss. Extensive experimental results on large publicly available datasets indicate superiority in comparison with most state-of-the-art.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18213v1" target="_blank" rel="noopener noreferrer">
                EMA-SAM：基于SAM的PTMC分割的指数移动平均方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            EMA-SAM: Exponential Moving-average for SAM-based PTMC Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Maryam Dialameh, Hossein Rajabzadeh, Jung Suk Sim, Hyock Ju Kwon
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确涉及医学图像分割（PTMC可能指甲状腺微小乳头状癌），属于医疗领域的计算机视觉应用。虽然提到了SAM（Segment Anything Model）这一基础模型，但应用场景完全限定在医疗诊断领域，与推荐系统、搜索或广告没有任何关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 01:30:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18213v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18213v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Papillary thyroid microcarcinoma (PTMC) is increasingly managed with radio-frequency ablation (RFA), yet accurate lesion segmentation in ultrasound videos remains difficult due to low contrast, probe-induced motion, and heat-related artifacts. The recent Segment Anything Model 2 (SAM-2) generalizes well to static images, but its frame-independent design yields unstable predictions and temporal drift in interventional ultrasound. We introduce \textbf{EMA-SAM}, a lightweight extension of SAM-2 that incorporates a confidence-weighted exponential moving average pointer into the memory bank, providing a stable latent prototype of the tumour across frames. This design preserves temporal coherence through probe pressure and bubble occlusion while rapidly adapting once clear evidence reappears. On our curated PTMC-RFA dataset (124 minutes, 13 patients), EMA-SAM improves \emph{maxDice} from 0.82 (SAM-2) to 0.86 and \emph{maxIoU} from 0.72 to 0.76, while reducing false positives by 29\%. On external benchmarks, including VTUS and colonoscopy video polyp datasets, EMA-SAM achieves consistent gains of 2--5 Dice points over SAM-2. Importantly, the EMA pointer adds \textless0.1\% FLOPs, preserving real-time throughput of $\sim$30\,FPS on a single A100 GPU. These results establish EMA-SAM as a robust and efficient framework for stable tumour tracking, bridging the gap between foundation models and the stringent demands of interventional ultrasound. Codes are available here \hyperref[code {https://github.com/mdialameh/EMA-SAM}.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18193v1" target="_blank" rel="noopener noreferrer">
                FST.ai 2.0：一个可解释的AI生态系统，用于奥运和残奥会跆拳道中公平、快速和包容的决策
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FST.ai 2.0: An Explainable AI Ecosystem for Fair, Fast, and Inclusive Decision-Making in Olympic and Paralympic Taekwondo
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Keivan Shariatmadar, Ahmad Osman, Ramin Ray, Usman Dildar, Kisam Kim
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确聚焦于公平性、可解释性和体育领域的特定应用，这些都属于被明确排除的无关主题范畴。标题中提到的'公平'、'包容'和'可解释'直接违反了关于排除公平性、伦理等非技术主题的要求，且应用领域是体育而非推荐系统、搜索或广告。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 00:35:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18193v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18193v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span><span class="category-tag">stat.ML</span><span class="category-tag">68T01</span><span class="category-tag">I.2.8</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Fair, transparent, and explainable decision-making remains a critical challenge in Olympic and Paralympic combat sports. This paper presents \emph{FST.ai 2.0}, an explainable AI ecosystem designed to support referees, coaches, and athletes in real time during Taekwondo competitions and training. The system integrates {pose-based action recognition} using graph convolutional networks (GCNs), {epistemic uncertainty modeling} through credal sets, and {explainability overlays} for visual decision support. A set of {interactive dashboards} enables human--AI collaboration in referee evaluation, athlete performance analysis, and Para-Taekwondo classification. Beyond automated scoring, FST.ai~2.0 incorporates modules for referee training, fairness monitoring, and policy-level analytics within the World Taekwondo ecosystem. Experimental validation on competition data demonstrates an {85\% reduction in decision review time} and {93\% referee trust} in AI-assisted decisions. The framework thus establishes a transparent and extensible pipeline for trustworthy, data-driven officiating and athlete assessment. By bridging real-time perception, explainable inference, and governance-aware design, FST.ai~2.0 represents a step toward equitable, accountable, and human-aligned AI in sports.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18189v1" target="_blank" rel="noopener noreferrer">
                一种可泛化的光传输三维嵌入方法，用于全局光照
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Generalizable Light Transport 3D Embedding for Global Illumination
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Bing Xu, Mukund Varma T, Cheng Wang, Tzumao Li, Lifan Wu, Bartlomiej Wronski, Ra...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于计算机图形学中的光传输和3D渲染技术，属于纯粹的视觉/图形领域。虽然标题提到'嵌入'概念，但这是针对物理光照模型的嵌入表示，与推荐系统、搜索或广告中的用户/item嵌入完全不同。该技术没有明显的应用场景可以转移到RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 00:29:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18189v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18189v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.GR</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Global illumination (GI) is essential for realistic rendering but remains computationally expensive due to the complexity of simulating indirect light transport. Recent neural methods have mainly relied on per-scene optimization, sometimes extended to handle changes in camera or geometry. Efforts toward cross-scene generalization have largely stayed in 2D screen space, such as neural denoising or G-buffer based GI prediction, which often suffer from view inconsistency and limited spatial understanding. We propose a generalizable 3D light transport embedding that approximates global illumination directly from 3D scene configurations, without using rasterized or path-traced cues. Each scene is represented as a point cloud with geometric and material features. A scalable transformer models global point-to-point interactions to encode these features into neural primitives. At render time, each query point retrieves nearby primitives via nearest-neighbor search and aggregates their latent features through cross-attention to predict the desired rendering quantity. We demonstrate results on diffuse global illumination prediction across diverse indoor scenes with varying layouts, geometry, and materials. The embedding trained for irradiance estimation can be quickly adapted to new rendering tasks with limited fine-tuning. We also present preliminary results for spatial-directional radiance field estimation for glossy materials and show how the normalized field can accelerate unbiased path guiding. This approach highlights a path toward integrating learned priors into rendering pipelines without explicit ray-traced illumination cues.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.18188v1" target="_blank" rel="noopener noreferrer">
                RadDiagSeg-M：用于放射学中联合诊断和多目标分割的视觉语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RadDiagSeg-M: A Vision Language Model for Joint Diagnosis and Multi-Target Segmentation in Radiology
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chengrun Li, Corentin Royer, Haozhe Luo, Bastian Wittmann, Xia Li, Ibrahim Hamam...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学放射学领域的视觉语言模型应用，涉及诊断和图像分割任务。这属于医学领域的特定应用，与搜索、推荐、广告系统无关，且不涉及任何可能应用于这些领域的核心技术进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-21 00:28:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.18188v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.18188v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">68</span><span class="category-tag">I.4.6</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Most current medical vision language models struggle to jointly generate diagnostic text and pixel-level segmentation masks in response to complex visual questions. This represents a major limitation towards clinical application, as assistive systems that fail to provide both modalities simultaneously offer limited value to medical practitioners. To alleviate this limitation, we first introduce RadDiagSeg-D, a dataset combining abnormality detection, diagnosis, and multi-target segmentation into a unified and hierarchical task. RadDiagSeg-D covers multiple imaging modalities and is precisely designed to support the development of models that produce descriptive text and corresponding segmentation masks in tandem. Subsequently, we leverage the dataset to propose a novel vision-language model, RadDiagSeg-M, capable of joint abnormality detection, diagnosis, and flexible segmentation. RadDiagSeg-M provides highly informative and clinically useful outputs, effectively addressing the need to enrich contextual information for assistive diagnosis. Finally, we benchmark RadDiagSeg-M and showcase its strong performance across all components involved in the task of multi-target text-and-mask generation, establishing a robust and competitive baseline.
                </div>
            </details>
    </div>
</div>
        </div>
    </main>

    <!-- 加载论文数据和JavaScript逻辑 -->
    <script src="static/app.js"></script>

    <script>
        document.addEventListener('DOMContentLoaded', function() {
            // 在精选论文和普通论文之间添加展开/折叠按钮
            const papersContainer = document.querySelector('#papers-container');
            if (papersContainer) {
                // 添加展开/折叠全部按钮
                const expandAllButton = document.createElement('div');
                expandAllButton.className = 'expand-toggle';
                expandAllButton.textContent = '展开/折叠全部非精选论文';
                expandAllButton.addEventListener('click', function() {
                    papersContainer.classList.toggle('expanded-all');
                    this.textContent = papersContainer.classList.contains('expanded-all') ? 
                        '收起全部非精选论文' : '展开全部非精选论文';
                    
                    // 更新所有论文标题前的图标状态
                    const collapsedPapers = papersContainer.querySelectorAll('.collapsed-level-1');
                    collapsedPapers.forEach(paper => {
                        const iconElement = paper.querySelector('.expand-icon');
                        if (iconElement) {
                            iconElement.className = papersContainer.classList.contains('expanded-all') ? 
                                'expand-icon fa fa-eye' : 'expand-icon fa fa-eye-slash';
                        }
                    });
                });
                
                // 找到第一个非精选论文的位置
                const firstNormalPaper = papersContainer.querySelector('.simple-paper-card');
                if (firstNormalPaper) {
                    papersContainer.insertBefore(expandAllButton, firstNormalPaper);
                }
                
                // 添加分割线用于展开分数<=1的论文
                const divider = document.createElement('div');
                divider.className = 'papers-divider';
                
                const dividerLabel = document.createElement('div');
                dividerLabel.className = 'papers-divider-label';
                dividerLabel.textContent = '点击展开更多论文（评分较低）';
                dividerLabel.addEventListener('click', function() {
                    papersContainer.classList.toggle('expanded-level-2');
                    this.textContent = papersContainer.classList.contains('expanded-level-2') ? 
                        '点击收起低分论文' : '点击展开更多论文（评分较低）';
                });
                
                divider.appendChild(dividerLabel);
                
                // 在所有非精选论文的最后一个元素后面添加分割线
                const normalPapers = papersContainer.querySelectorAll('.simple-paper-card');
                if (normalPapers.length > 0) {
                    const lastNormalPaper = normalPapers[normalPapers.length - 1];
                    papersContainer.insertBefore(divider, lastNormalPaper.nextSibling);
                }
            }
            
            // 为每个非精选论文添加点击标题展开/折叠详情的功能
            const collapsedPapers = document.querySelectorAll('.collapsed-level-1');
            collapsedPapers.forEach(paper => {
                const titleElement = paper.querySelector('h3');
                if (titleElement) {
                    titleElement.style.cursor = 'pointer';
                    
                    // 创建展开/折叠图标元素并设置样式
                    const iconElement = document.createElement('i');
                    iconElement.className = 'expand-icon fa fa-eye-slash cursor-pointer';
                    iconElement.style.marginRight = '8px';
                    
                    // 将图标插入到标题链接之前，作为同级元素
                    const linkElement = titleElement.querySelector('a');
                    if (linkElement) {
                        // 将图标直接添加到标题元素中，位于链接之前
                        titleElement.insertBefore(iconElement, linkElement);
                        
                        // 为图标单独添加点击事件处理展开/折叠
                        iconElement.addEventListener('click', function(e) {
                            e.stopPropagation(); // 阻止事件冒泡到标题元素
                            const details = paper.querySelector('.paper-details');
                            if (details) {
                                const isExpanded = details.style.display === 'block';
                                details.style.display = isExpanded ? 'none' : 'block';
                                
                                // 更新图标状态
                                this.className = isExpanded ? 
                                    'expand-icon fa fa-eye-slash cursor-pointer' : 'expand-icon fa fa-eye cursor-pointer';
                                this.style.marginRight = '8px';
                            }
                        });
                    }
                    
                    // 为标题元素添加点击事件，也可以展开/折叠，但会检查点击目标
                    titleElement.addEventListener('click', function(e) {
                        // 仅当点击的是标题本身（非链接、非图标）时才展开/折叠
                        if (!e.target.closest('a') && !e.target.closest('.expand-icon')) {
                            const details = paper.querySelector('.paper-details');
                            if (details) {
                                const isExpanded = details.style.display === 'block';
                                details.style.display = isExpanded ? 'none' : 'block';
                                
                                // 更新图标状态
                                const iconElement = this.querySelector('.expand-icon');
                                if (iconElement) {
                                    iconElement.className = isExpanded ? 
                                        'expand-icon fa fa-eye-slash cursor-pointer' : 'expand-icon fa fa-eye cursor-pointer';
                                    iconElement.style.marginRight = '8px';
                                }
                            }
                        }
                    });
                }
            });
            
            // 实现"仅显示精选"按钮功能
            const showSelectedButton = document.getElementById('show-selected');
            if (showSelectedButton) {
                showSelectedButton.addEventListener('click', function() {
                    // 显示所有精选论文，隐藏所有普通论文
                    const selectedPapers = document.querySelectorAll('.paper-card');
                    const normalPapers = document.querySelectorAll('.simple-paper-card');
                    
                    selectedPapers.forEach(paper => {
                        paper.style.display = 'block';
                    });
                    
                    normalPapers.forEach(paper => {
                        paper.style.display = 'none';
                    });
                    
                    // 更新显示计数
                    const displayCountElement = document.getElementById('display-count');
                    if (displayCountElement) {
                        displayCountElement.textContent = `显示 ${selectedPapers.length} 篇论文 (共 ${selectedPapers.length + normalPapers.length} 篇)`;
                    }
                    
                    // 更新按钮样式
                    this.className = 'px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors';
                    document.getElementById('show-all').className = 'px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors';
                    
                    // 隐藏展开/折叠按钮和分割线
                    const expandToggle = document.querySelector('.expand-toggle');
                    if (expandToggle) expandToggle.style.display = 'none';
                    
                    const papersDivider = document.querySelector('.papers-divider');
                    if (papersDivider) papersDivider.style.display = 'none';
                });
            }
            
            // 实现"全部论文"按钮功能
            const showAllButton = document.getElementById('show-all');
            if (showAllButton) {
                showAllButton.addEventListener('click', function() {
                    // 显示所有论文
                    const allPapers = document.querySelectorAll('.paper-card, .simple-paper-card');
                    allPapers.forEach(paper => {
                        paper.style.display = 'block';
                    });
                    
                    // 重置折叠状态
                    papersContainer.classList.remove('expanded-all');
                    
                    // 更新显示计数
                    const displayCountElement = document.getElementById('display-count');
                    if (displayCountElement) {
                        displayCountElement.textContent = `显示 ${allPapers.length} 篇论文 (共 ${allPapers.length} 篇)`;
                    }
                    
                    // 更新按钮样式
                    this.className = 'px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors';
                    document.getElementById('show-selected').className = 'px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors';
                    
                    // 重新显示展开/折叠按钮和分割线
                    const expandToggle = document.querySelector('.expand-toggle');
                    if (expandToggle) {
                        expandToggle.style.display = 'block';
                        expandToggle.textContent = '展开全部非精选论文';
                    }
                    
                    const papersDivider = document.querySelector('.papers-divider');
                    if (papersDivider) papersDivider.style.display = 'block';
                });
            }
        });
    </script>
    <script>
    
    // 初始化日历
    document.addEventListener('DOMContentLoaded', () => {
        try {
            console.log('Attempting to initialize calendar...');
            initCalendar();
        } catch (error) {
            console.error('Error initializing calendar:', error);
        }
    });
    
    // 日历初始化函数
    function initCalendar() {
        const toggleBtn = document.getElementById('date-picker-toggle');
        const datePicker = document.getElementById('date-picker');
        const calendarGrid = document.getElementById('calendar-grid');
        const prevMonthBtn = document.getElementById('prev-month');
        const nextMonthBtn = document.getElementById('next-month');
        const currentMonthEl = document.getElementById('current-month');
        const selectedDateText = document.getElementById('selected-date-text');
        
        // 当前显示的日期（从页面获取）
        const currentDateStr = document.getElementById('current-date').textContent.trim().replace(/^\d+年|月|日/g, '');
        const currentDate = new Date(currentDateStr);
        let displayYear = currentDate.getFullYear();
        let displayMonth = currentDate.getMonth();
        
        // 有论文数据的日期列表
        const availableDates = ["20251014","20251022","20251023","20251015","20251024","20251009","20251010","20251016","20251021","20251017"];
        
        // 尝试从localStorage恢复选择状态
        const savedDate = localStorage.getItem('selectedDate');
        const savedYear = localStorage.getItem('selectedYear');
        const savedMonth = localStorage.getItem('selectedMonth');
        
        // 确保页面加载时显示当前选中的日期
        // 修复持久化问题：确保每次加载都能正确恢复选中状态
        if (savedDate) {
            selectedDateText.textContent = savedDate;
            if (savedYear) displayYear = parseInt(savedYear);
            if (savedMonth) displayMonth = parseInt(savedMonth);
        } else {
            // 首次加载时，将当前页面日期保存到localStorage
            const currentPageDate = currentDateStr.replace(/\//g, '-');
            selectedDateText.textContent = currentPageDate;
            localStorage.setItem('selectedDate', currentPageDate);
            localStorage.setItem('selectedYear', currentDate.getFullYear().toString());
            localStorage.setItem('selectedMonth', currentDate.getMonth().toString());
        }
    
        // 切换日历显示状态
        toggleBtn.addEventListener('click', (e) => {
            e.stopPropagation();
            
            // 显式控制hidden类的添加和移除
            if (datePicker.classList.contains('hidden')) {
                // 显示日历 - 确保移除hidden类
                datePicker.classList.remove('hidden');
                renderCalendar();
            } else {
                // 隐藏日历
                datePicker.classList.add('hidden');
            }
        });
        
        // 点击其他区域关闭日历
        document.addEventListener('click', () => {
            if (!datePicker.classList.contains('hidden')) {
                datePicker.classList.add('hidden');
            }
        });
        
        // 阻止日历内部点击事件冒泡
        datePicker.addEventListener('click', (e) => {
            e.stopPropagation();
        });
        
        // 上月和下月按钮
        prevMonthBtn.addEventListener('click', () => {
            displayMonth--;
            if (displayMonth < 0) {
                displayMonth = 11;
                displayYear--;
            }
            renderCalendar();
        });
        
        nextMonthBtn.addEventListener('click', () => {
            displayMonth++;
            if (displayMonth > 11) {
                displayMonth = 0;
                displayYear++;
            }
            renderCalendar();
        });
        
        /**
         * 渲染日历
         */
        function renderCalendar() {
            // 清空日历网格
            calendarGrid.innerHTML = '';
            
            // 更新当前月份显示
            const monthNames = ['1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月', '12月'];
            currentMonthEl.textContent = displayYear + '年' + monthNames[displayMonth];
            
            // 计算当前月份的第一天是星期几
            const firstDay = new Date(displayYear, displayMonth, 1);
            const firstDayOfWeek = firstDay.getDay();
            
            // 计算当前月份的天数
            const daysInMonth = new Date(displayYear, displayMonth + 1, 0).getDate();
            
            // 添加上月的占位天数
            for (let i = 0; i < firstDayOfWeek; i++) {
                const emptyDay = document.createElement('div');
                emptyDay.classList.add('py-1', 'text-gray-300');
                calendarGrid.appendChild(emptyDay);
            }
            
            // 获取当前日期（用于高亮显示）
            const today = new Date();
            today.setHours(0, 0, 0, 0);
            
            // 添加当前月份的天数
            for (let day = 1; day <= daysInMonth; day++) {
                const dayElement = document.createElement('div');
                const currentDateObj = new Date(displayYear, displayMonth, day);
                const dateStr = displayYear + String(displayMonth + 1).padStart(2, '0') + String(day).padStart(2, '0');
                const displayDateStr = displayYear + '-' + String(displayMonth + 1).padStart(2, '0') + '-' + String(day).padStart(2, '0');
                
                // 设置日期元素基本样式
                dayElement.textContent = day;
                
                // 检查该日期是否有论文数据
                const hasPapers = availableDates.includes(dateStr);
                
                if (hasPapers) {
                    // 有论文数据的日期样式
                    dayElement.classList.add('py-1', 'cursor-pointer', 'hover:bg-gray-100', 'rounded', 'bg-blue-50', 'font-medium');
                    
                    // 添加点击事件，跳转到对应日期的页面
                    dayElement.addEventListener('click', () => {
                        console.log('Date clicked:', displayDateStr);
                        selectedDateText.textContent = displayDateStr;
                        
                        // 保存选择状态到localStorage
                        localStorage.setItem('selectedDate', displayDateStr);
                        localStorage.setItem('selectedYear', displayYear.toString());
                        localStorage.setItem('selectedMonth', displayMonth.toString());
                        
                        datePicker.classList.add('hidden');
                        
                        // 构造目标URL并跳转
                        const targetUrl = 'arxiv_' + dateStr + '.html';
                        window.location.href = targetUrl;
                    });
                } else {
                    // 没有论文数据的日期样式（置灰不可点击）
                    dayElement.classList.add('py-1', 'text-gray-400', 'cursor-not-allowed');
                }
                
                // 高亮显示当天日期（覆盖之前的样式）
                if (currentDateObj.getTime() === today.getTime()) {
                    dayElement.classList.remove('bg-blue-50');
                    dayElement.classList.add('bg-primary', 'text-white', 'font-bold', 'shadow');
                    if (!hasPapers) {
                        // 当天没有论文时，仍然置灰但保持背景色
                        dayElement.classList.add('opacity-70');
                    }
                }
                
                // 高亮显示当前选中的日期
                if (displayDateStr === selectedDateText.textContent) {
                    dayElement.classList.add('font-bold', 'border-2', 'border-primary', 'rounded-lg', 'shadow-md');
                }
                
                // 增强有论文数据的日期样式，使其更明显
                if (hasPapers && currentDateObj.getTime() !== today.getTime()) {
                    dayElement.classList.add('bg-blue-100', 'hover:bg-blue-200', 'transition-colors', 'duration-200');
                }
                
                calendarGrid.appendChild(dayElement);
            }
        }
    }
    </script>
    </body>

</html>